1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "ABIInfoImpl.h" 15 #include "CGCXXABI.h" 16 #include "CGCleanup.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/APValue.h" 21 #include "clang/AST/Attr.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/AST/OpenMPClause.h" 24 #include "clang/AST/StmtOpenMP.h" 25 #include "clang/AST/StmtVisitor.h" 26 #include "clang/Basic/BitmaskEnum.h" 27 #include "clang/Basic/FileManager.h" 28 #include "clang/Basic/OpenMPKinds.h" 29 #include "clang/Basic/SourceManager.h" 30 #include "clang/CodeGen/ConstantInitBuilder.h" 31 #include "llvm/ADT/ArrayRef.h" 32 #include "llvm/ADT/SetOperations.h" 33 #include "llvm/ADT/SmallBitVector.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringExtras.h" 36 #include "llvm/Bitcode/BitcodeReader.h" 37 #include "llvm/IR/Constants.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/GlobalValue.h" 40 #include "llvm/IR/InstrTypes.h" 41 #include "llvm/IR/Value.h" 42 #include "llvm/Support/AtomicOrdering.h" 43 #include "llvm/Support/Format.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <numeric> 48 #include <optional> 49 50 using namespace clang; 51 using namespace CodeGen; 52 using namespace llvm::omp; 53 54 namespace { 55 /// Base class for handling code generation inside OpenMP regions. 56 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 57 public: 58 /// Kinds of OpenMP regions used in codegen. 59 enum CGOpenMPRegionKind { 60 /// Region with outlined function for standalone 'parallel' 61 /// directive. 62 ParallelOutlinedRegion, 63 /// Region with outlined function for standalone 'task' directive. 64 TaskOutlinedRegion, 65 /// Region for constructs that do not require function outlining, 66 /// like 'for', 'sections', 'atomic' etc. directives. 67 InlinedRegion, 68 /// Region with outlined function for standalone 'target' directive. 69 TargetRegion, 70 }; 71 CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)72 CGOpenMPRegionInfo(const CapturedStmt &CS, 73 const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 78 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 81 bool HasCancel) 82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 83 Kind(Kind), HasCancel(HasCancel) {} 84 85 /// Get a variable or parameter for storing global thread id 86 /// inside OpenMP construct. 87 virtual const VarDecl *getThreadIDVariable() const = 0; 88 89 /// Emit the captured statement body. 90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 91 92 /// Get an LValue for the current ThreadID variable. 93 /// \return LValue for thread id variable. This LValue always has type int32*. 94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 95 emitUntiedSwitch(CodeGenFunction &)96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 97 getRegionKind() const98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 99 getDirectiveKind() const100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 101 hasCancel() const102 bool hasCancel() const { return HasCancel; } 103 classof(const CGCapturedStmtInfo * Info)104 static bool classof(const CGCapturedStmtInfo *Info) { 105 return Info->getKind() == CR_OpenMP; 106 } 107 108 ~CGOpenMPRegionInfo() override = default; 109 110 protected: 111 CGOpenMPRegionKind RegionKind; 112 RegionCodeGenTy CodeGen; 113 OpenMPDirectiveKind Kind; 114 bool HasCancel; 115 }; 116 117 /// API for captured statement code generation in OpenMP constructs. 118 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 119 public: CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 121 const RegionCodeGenTy &CodeGen, 122 OpenMPDirectiveKind Kind, bool HasCancel, 123 StringRef HelperName) 124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 125 HasCancel), 126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 128 } 129 130 /// Get a variable or parameter for storing global thread id 131 /// inside OpenMP construct. getThreadIDVariable() const132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 133 134 /// Get the name of the capture helper. getHelperName() const135 StringRef getHelperName() const override { return HelperName; } 136 classof(const CGCapturedStmtInfo * Info)137 static bool classof(const CGCapturedStmtInfo *Info) { 138 return CGOpenMPRegionInfo::classof(Info) && 139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 140 ParallelOutlinedRegion; 141 } 142 143 private: 144 /// A variable or parameter storing global thread id for OpenMP 145 /// constructs. 146 const VarDecl *ThreadIDVar; 147 StringRef HelperName; 148 }; 149 150 /// API for captured statement code generation in OpenMP constructs. 151 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 152 public: 153 class UntiedTaskActionTy final : public PrePostActionTy { 154 bool Untied; 155 const VarDecl *PartIDVar; 156 const RegionCodeGenTy UntiedCodeGen; 157 llvm::SwitchInst *UntiedSwitch = nullptr; 158 159 public: UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 161 const RegionCodeGenTy &UntiedCodeGen) 162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} Enter(CodeGenFunction & CGF)163 void Enter(CodeGenFunction &CGF) override { 164 if (Untied) { 165 // Emit task switching point. 166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 167 CGF.GetAddrOfLocalVar(PartIDVar), 168 PartIDVar->getType()->castAs<PointerType>()); 169 llvm::Value *Res = 170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 173 CGF.EmitBlock(DoneBB); 174 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 176 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 177 CGF.Builder.GetInsertBlock()); 178 emitUntiedSwitch(CGF); 179 } 180 } emitUntiedSwitch(CodeGenFunction & CGF) const181 void emitUntiedSwitch(CodeGenFunction &CGF) const { 182 if (Untied) { 183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 184 CGF.GetAddrOfLocalVar(PartIDVar), 185 PartIDVar->getType()->castAs<PointerType>()); 186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 PartIdLVal); 188 UntiedCodeGen(CGF); 189 CodeGenFunction::JumpDest CurPoint = 190 CGF.getJumpDestInCurrentScope(".untied.next."); 191 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 194 CGF.Builder.GetInsertBlock()); 195 CGF.EmitBranchThroughCleanup(CurPoint); 196 CGF.EmitBlock(CurPoint.getBlock()); 197 } 198 } getNumberOfParts() const199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 200 }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 202 const VarDecl *ThreadIDVar, 203 const RegionCodeGenTy &CodeGen, 204 OpenMPDirectiveKind Kind, bool HasCancel, 205 const UntiedTaskActionTy &Action) 206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 207 ThreadIDVar(ThreadIDVar), Action(Action) { 208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 209 } 210 211 /// Get a variable or parameter for storing global thread id 212 /// inside OpenMP construct. getThreadIDVariable() const213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 214 215 /// Get an LValue for the current ThreadID variable. 216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 217 218 /// Get the name of the capture helper. getHelperName() const219 StringRef getHelperName() const override { return ".omp_outlined."; } 220 emitUntiedSwitch(CodeGenFunction & CGF)221 void emitUntiedSwitch(CodeGenFunction &CGF) override { 222 Action.emitUntiedSwitch(CGF); 223 } 224 classof(const CGCapturedStmtInfo * Info)225 static bool classof(const CGCapturedStmtInfo *Info) { 226 return CGOpenMPRegionInfo::classof(Info) && 227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 228 TaskOutlinedRegion; 229 } 230 231 private: 232 /// A variable or parameter storing global thread id for OpenMP 233 /// constructs. 234 const VarDecl *ThreadIDVar; 235 /// Action for emitting code for untied tasks. 236 const UntiedTaskActionTy &Action; 237 }; 238 239 /// API for inlined captured statement code generation in OpenMP 240 /// constructs. 241 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 242 public: CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 244 const RegionCodeGenTy &CodeGen, 245 OpenMPDirectiveKind Kind, bool HasCancel) 246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 247 OldCSI(OldCSI), 248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 249 250 // Retrieve the value of the context parameter. getContextValue() const251 llvm::Value *getContextValue() const override { 252 if (OuterRegionInfo) 253 return OuterRegionInfo->getContextValue(); 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 setContextValue(llvm::Value * V)257 void setContextValue(llvm::Value *V) override { 258 if (OuterRegionInfo) { 259 OuterRegionInfo->setContextValue(V); 260 return; 261 } 262 llvm_unreachable("No context value for inlined OpenMP region"); 263 } 264 265 /// Lookup the captured field decl for a variable. lookup(const VarDecl * VD) const266 const FieldDecl *lookup(const VarDecl *VD) const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->lookup(VD); 269 // If there is no outer outlined region,no need to lookup in a list of 270 // captured variables, we can use the original one. 271 return nullptr; 272 } 273 getThisFieldDecl() const274 FieldDecl *getThisFieldDecl() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThisFieldDecl(); 277 return nullptr; 278 } 279 280 /// Get a variable or parameter for storing global thread id 281 /// inside OpenMP construct. getThreadIDVariable() const282 const VarDecl *getThreadIDVariable() const override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariable(); 285 return nullptr; 286 } 287 288 /// Get an LValue for the current ThreadID variable. getThreadIDVariableLValue(CodeGenFunction & CGF)289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 290 if (OuterRegionInfo) 291 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 292 llvm_unreachable("No LValue for inlined OpenMP construct"); 293 } 294 295 /// Get the name of the capture helper. getHelperName() const296 StringRef getHelperName() const override { 297 if (auto *OuterRegionInfo = getOldCSI()) 298 return OuterRegionInfo->getHelperName(); 299 llvm_unreachable("No helper name for inlined OpenMP construct"); 300 } 301 emitUntiedSwitch(CodeGenFunction & CGF)302 void emitUntiedSwitch(CodeGenFunction &CGF) override { 303 if (OuterRegionInfo) 304 OuterRegionInfo->emitUntiedSwitch(CGF); 305 } 306 getOldCSI() const307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 308 classof(const CGCapturedStmtInfo * Info)309 static bool classof(const CGCapturedStmtInfo *Info) { 310 return CGOpenMPRegionInfo::classof(Info) && 311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 312 } 313 314 ~CGOpenMPInlinedRegionInfo() override = default; 315 316 private: 317 /// CodeGen info about outer OpenMP region. 318 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 319 CGOpenMPRegionInfo *OuterRegionInfo; 320 }; 321 322 /// API for captured statement code generation in OpenMP target 323 /// constructs. For this captures, implicit parameters are used instead of the 324 /// captured fields. The name of the target region has to be unique in a given 325 /// application so it is provided by the client, because only the client has 326 /// the information to generate that. 327 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 328 public: CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 330 const RegionCodeGenTy &CodeGen, StringRef HelperName) 331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 332 /*HasCancel=*/false), 333 HelperName(HelperName) {} 334 335 /// This is unused for target regions because each starts executing 336 /// with a single thread. getThreadIDVariable() const337 const VarDecl *getThreadIDVariable() const override { return nullptr; } 338 339 /// Get the name of the capture helper. getHelperName() const340 StringRef getHelperName() const override { return HelperName; } 341 classof(const CGCapturedStmtInfo * Info)342 static bool classof(const CGCapturedStmtInfo *Info) { 343 return CGOpenMPRegionInfo::classof(Info) && 344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 345 } 346 347 private: 348 StringRef HelperName; 349 }; 350 EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)351 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 352 llvm_unreachable("No codegen for expressions"); 353 } 354 /// API for generation of expressions captured in a innermost OpenMP 355 /// region. 356 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 357 public: CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 360 OMPD_unknown, 361 /*HasCancel=*/false), 362 PrivScope(CGF) { 363 // Make sure the globals captured in the provided statement are local by 364 // using the privatization logic. We assume the same variable is not 365 // captured more than once. 366 for (const auto &C : CS.captures()) { 367 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 368 continue; 369 370 const VarDecl *VD = C.getCapturedVar(); 371 if (VD->isLocalVarDeclOrParm()) 372 continue; 373 374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 375 /*RefersToEnclosingVariableOrCapture=*/false, 376 VD->getType().getNonReferenceType(), VK_LValue, 377 C.getLocation()); 378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 379 } 380 (void)PrivScope.Privatize(); 381 } 382 383 /// Lookup the captured field decl for a variable. lookup(const VarDecl * VD) const384 const FieldDecl *lookup(const VarDecl *VD) const override { 385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 386 return FD; 387 return nullptr; 388 } 389 390 /// Emit the captured statement body. EmitBody(CodeGenFunction & CGF,const Stmt * S)391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 392 llvm_unreachable("No body for expressions"); 393 } 394 395 /// Get a variable or parameter for storing global thread id 396 /// inside OpenMP construct. getThreadIDVariable() const397 const VarDecl *getThreadIDVariable() const override { 398 llvm_unreachable("No thread id for expressions"); 399 } 400 401 /// Get the name of the capture helper. getHelperName() const402 StringRef getHelperName() const override { 403 llvm_unreachable("No helper name for expressions"); 404 } 405 classof(const CGCapturedStmtInfo * Info)406 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 407 408 private: 409 /// Private scope to capture global variables. 410 CodeGenFunction::OMPPrivateScope PrivScope; 411 }; 412 413 /// RAII for emitting code of OpenMP constructs. 414 class InlinedOpenMPRegionRAII { 415 CodeGenFunction &CGF; 416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 417 FieldDecl *LambdaThisCaptureField = nullptr; 418 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 419 bool NoInheritance = false; 420 421 public: 422 /// Constructs region for combined constructs. 423 /// \param CodeGen Code generation sequence for combined directives. Includes 424 /// a list of functions used for code generation of implicitly inlined 425 /// regions. InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 427 OpenMPDirectiveKind Kind, bool HasCancel, 428 bool NoInheritance = true) 429 : CGF(CGF), NoInheritance(NoInheritance) { 430 // Start emission for the construct. 431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 433 if (NoInheritance) { 434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 435 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 436 CGF.LambdaThisCaptureField = nullptr; 437 BlockInfo = CGF.BlockInfo; 438 CGF.BlockInfo = nullptr; 439 } 440 } 441 ~InlinedOpenMPRegionRAII()442 ~InlinedOpenMPRegionRAII() { 443 // Restore original CapturedStmtInfo only if we're done with code emission. 444 auto *OldCSI = 445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 446 delete CGF.CapturedStmtInfo; 447 CGF.CapturedStmtInfo = OldCSI; 448 if (NoInheritance) { 449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 450 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 451 CGF.BlockInfo = BlockInfo; 452 } 453 } 454 }; 455 456 /// Values for bit flags used in the ident_t to describe the fields. 457 /// All enumeric elements are named and described in accordance with the code 458 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 459 enum OpenMPLocationFlags : unsigned { 460 /// Use trampoline for internal microtask. 461 OMP_IDENT_IMD = 0x01, 462 /// Use c-style ident structure. 463 OMP_IDENT_KMPC = 0x02, 464 /// Atomic reduction option for kmpc_reduce. 465 OMP_ATOMIC_REDUCE = 0x10, 466 /// Explicit 'barrier' directive. 467 OMP_IDENT_BARRIER_EXPL = 0x20, 468 /// Implicit barrier in code. 469 OMP_IDENT_BARRIER_IMPL = 0x40, 470 /// Implicit barrier in 'for' directive. 471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 472 /// Implicit barrier in 'sections' directive. 473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 474 /// Implicit barrier in 'single' directive. 475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 476 /// Call of __kmp_for_static_init for static loop. 477 OMP_IDENT_WORK_LOOP = 0x200, 478 /// Call of __kmp_for_static_init for sections. 479 OMP_IDENT_WORK_SECTIONS = 0x400, 480 /// Call of __kmp_for_static_init for distribute. 481 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 483 }; 484 485 /// Describes ident structure that describes a source location. 486 /// All descriptions are taken from 487 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 488 /// Original structure: 489 /// typedef struct ident { 490 /// kmp_int32 reserved_1; /**< might be used in Fortran; 491 /// see above */ 492 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 493 /// KMP_IDENT_KMPC identifies this union 494 /// member */ 495 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 496 /// see above */ 497 ///#if USE_ITT_BUILD 498 /// /* but currently used for storing 499 /// region-specific ITT */ 500 /// /* contextual information. */ 501 ///#endif /* USE_ITT_BUILD */ 502 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 503 /// C++ */ 504 /// char const *psource; /**< String describing the source location. 505 /// The string is composed of semi-colon separated 506 // fields which describe the source file, 507 /// the function and a pair of line numbers that 508 /// delimit the construct. 509 /// */ 510 /// } ident_t; 511 enum IdentFieldIndex { 512 /// might be used in Fortran 513 IdentField_Reserved_1, 514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 515 IdentField_Flags, 516 /// Not really used in Fortran any more 517 IdentField_Reserved_2, 518 /// Source[4] in Fortran, do not use for C++ 519 IdentField_Reserved_3, 520 /// String describing the source location. The string is composed of 521 /// semi-colon separated fields which describe the source file, the function 522 /// and a pair of line numbers that delimit the construct. 523 IdentField_PSource 524 }; 525 526 /// Schedule types for 'omp for' loops (these enumerators are taken from 527 /// the enum sched_type in kmp.h). 528 enum OpenMPSchedType { 529 /// Lower bound for default (unordered) versions. 530 OMP_sch_lower = 32, 531 OMP_sch_static_chunked = 33, 532 OMP_sch_static = 34, 533 OMP_sch_dynamic_chunked = 35, 534 OMP_sch_guided_chunked = 36, 535 OMP_sch_runtime = 37, 536 OMP_sch_auto = 38, 537 /// static with chunk adjustment (e.g., simd) 538 OMP_sch_static_balanced_chunked = 45, 539 /// Lower bound for 'ordered' versions. 540 OMP_ord_lower = 64, 541 OMP_ord_static_chunked = 65, 542 OMP_ord_static = 66, 543 OMP_ord_dynamic_chunked = 67, 544 OMP_ord_guided_chunked = 68, 545 OMP_ord_runtime = 69, 546 OMP_ord_auto = 70, 547 OMP_sch_default = OMP_sch_static, 548 /// dist_schedule types 549 OMP_dist_sch_static_chunked = 91, 550 OMP_dist_sch_static = 92, 551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 552 /// Set if the monotonic schedule modifier was present. 553 OMP_sch_modifier_monotonic = (1 << 29), 554 /// Set if the nonmonotonic schedule modifier was present. 555 OMP_sch_modifier_nonmonotonic = (1 << 30), 556 }; 557 558 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 559 /// region. 560 class CleanupTy final : public EHScopeStack::Cleanup { 561 PrePostActionTy *Action; 562 563 public: CleanupTy(PrePostActionTy * Action)564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} Emit(CodeGenFunction & CGF,Flags)565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 566 if (!CGF.HaveInsertPoint()) 567 return; 568 Action->Exit(CGF); 569 } 570 }; 571 572 } // anonymous namespace 573 operator ()(CodeGenFunction & CGF) const574 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 575 CodeGenFunction::RunCleanupsScope Scope(CGF); 576 if (PrePostAction) { 577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 578 Callback(CodeGen, CGF, *PrePostAction); 579 } else { 580 PrePostActionTy Action; 581 Callback(CodeGen, CGF, Action); 582 } 583 } 584 585 /// Check if the combiner is a call to UDR combiner and if it is so return the 586 /// UDR decl used for reduction. 587 static const OMPDeclareReductionDecl * getReductionInit(const Expr * ReductionOp)588 getReductionInit(const Expr *ReductionOp) { 589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 591 if (const auto *DRE = 592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 594 return DRD; 595 return nullptr; 596 } 597 emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)598 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 599 const OMPDeclareReductionDecl *DRD, 600 const Expr *InitOp, 601 Address Private, Address Original, 602 QualType Ty) { 603 if (DRD->getInitializer()) { 604 std::pair<llvm::Function *, llvm::Function *> Reduction = 605 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 606 const auto *CE = cast<CallExpr>(InitOp); 607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 610 const auto *LHSDRE = 611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 612 const auto *RHSDRE = 613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 617 (void)PrivateScope.Privatize(); 618 RValue Func = RValue::get(Reduction.second); 619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 620 CGF.EmitIgnoredExpr(InitOp); 621 } else { 622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 624 auto *GV = new llvm::GlobalVariable( 625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 626 llvm::GlobalValue::PrivateLinkage, Init, Name); 627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty); 628 RValue InitRVal; 629 switch (CGF.getEvaluationKind(Ty)) { 630 case TEK_Scalar: 631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 632 break; 633 case TEK_Complex: 634 InitRVal = 635 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 636 break; 637 case TEK_Aggregate: { 638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 641 /*IsInitializer=*/false); 642 return; 643 } 644 } 645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 648 /*IsInitializer=*/false); 649 } 650 } 651 652 /// Emit initialization of arrays of complex types. 653 /// \param DestAddr Address of the array. 654 /// \param Type Type of array. 655 /// \param Init Initial expression of array. 656 /// \param SrcAddr Address of the original array. EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())657 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 658 QualType Type, bool EmitDeclareReductionInit, 659 const Expr *Init, 660 const OMPDeclareReductionDecl *DRD, 661 Address SrcAddr = Address::invalid()) { 662 // Perform element-by-element initialization. 663 QualType ElementTy; 664 665 // Drill down to the base element type on both arrays. 666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 668 if (DRD) 669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 670 671 llvm::Value *SrcBegin = nullptr; 672 if (DRD) 673 SrcBegin = SrcAddr.emitRawPointer(CGF); 674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF); 675 // Cast from pointer to array type to pointer to single element. 676 llvm::Value *DestEnd = 677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 678 // The basic structure here is a while-do loop. 679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 681 llvm::Value *IsEmpty = 682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 684 685 // Enter the loop body, making that address the current address. 686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 687 CGF.EmitBlock(BodyBB); 688 689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 690 691 llvm::PHINode *SrcElementPHI = nullptr; 692 Address SrcElementCurrent = Address::invalid(); 693 if (DRD) { 694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 695 "omp.arraycpy.srcElementPast"); 696 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 697 SrcElementCurrent = 698 Address(SrcElementPHI, SrcAddr.getElementType(), 699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 700 } 701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 703 DestElementPHI->addIncoming(DestBegin, EntryBB); 704 Address DestElementCurrent = 705 Address(DestElementPHI, DestAddr.getElementType(), 706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 707 708 // Emit copy. 709 { 710 CodeGenFunction::RunCleanupsScope InitScope(CGF); 711 if (EmitDeclareReductionInit) { 712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 713 SrcElementCurrent, ElementTy); 714 } else 715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 716 /*IsInitializer=*/false); 717 } 718 719 if (DRD) { 720 // Shift the address forward by one element. 721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 723 "omp.arraycpy.dest.element"); 724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 725 } 726 727 // Shift the address forward by one element. 728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 730 "omp.arraycpy.dest.element"); 731 // Check whether we've reached the end. 732 llvm::Value *Done = 733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 736 737 // Done. 738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 739 } 740 emitSharedLValue(CodeGenFunction & CGF,const Expr * E)741 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 742 return CGF.EmitOMPSharedLValue(E); 743 } 744 emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)745 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 746 const Expr *E) { 747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E)) 748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false); 749 return LValue(); 750 } 751 emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,const OMPDeclareReductionDecl * DRD)752 void ReductionCodeGen::emitAggregateInitialization( 753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 754 const OMPDeclareReductionDecl *DRD) { 755 // Emit VarDecl with copy init for arrays. 756 // Get the address of the original variable captured in current 757 // captured region. 758 const auto *PrivateVD = 759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 760 bool EmitDeclareReductionInit = 761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 763 EmitDeclareReductionInit, 764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 765 : PrivateVD->getInit(), 766 DRD, SharedAddr); 767 } 768 ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)769 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 770 ArrayRef<const Expr *> Origs, 771 ArrayRef<const Expr *> Privates, 772 ArrayRef<const Expr *> ReductionOps) { 773 ClausesData.reserve(Shareds.size()); 774 SharedAddresses.reserve(Shareds.size()); 775 Sizes.reserve(Shareds.size()); 776 BaseDecls.reserve(Shareds.size()); 777 const auto *IOrig = Origs.begin(); 778 const auto *IPriv = Privates.begin(); 779 const auto *IRed = ReductionOps.begin(); 780 for (const Expr *Ref : Shareds) { 781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 782 std::advance(IOrig, 1); 783 std::advance(IPriv, 1); 784 std::advance(IRed, 1); 785 } 786 } 787 emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)788 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 790 "Number of generated lvalues must be exactly N."); 791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 793 SharedAddresses.emplace_back(First, Second); 794 if (ClausesData[N].Shared == ClausesData[N].Ref) { 795 OrigAddresses.emplace_back(First, Second); 796 } else { 797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 799 OrigAddresses.emplace_back(First, Second); 800 } 801 } 802 emitAggregateType(CodeGenFunction & CGF,unsigned N)803 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 804 QualType PrivateType = getPrivateType(N); 805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref); 806 if (!PrivateType->isVariablyModifiedType()) { 807 Sizes.emplace_back( 808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 809 nullptr); 810 return; 811 } 812 llvm::Value *Size; 813 llvm::Value *SizeInChars; 814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType(); 815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 816 if (AsArraySection) { 817 Size = CGF.Builder.CreatePtrDiff(ElemType, 818 OrigAddresses[N].second.getPointer(CGF), 819 OrigAddresses[N].first.getPointer(CGF)); 820 Size = CGF.Builder.CreateNUWAdd( 821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 823 } else { 824 SizeInChars = 825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 827 } 828 Sizes.emplace_back(SizeInChars, Size); 829 CodeGenFunction::OpaqueValueMapping OpaqueMap( 830 CGF, 831 cast<OpaqueValueExpr>( 832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 833 RValue::get(Size)); 834 CGF.EmitVariablyModifiedType(PrivateType); 835 } 836 emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)837 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 838 llvm::Value *Size) { 839 QualType PrivateType = getPrivateType(N); 840 if (!PrivateType->isVariablyModifiedType()) { 841 assert(!Size && !Sizes[N].second && 842 "Size should be nullptr for non-variably modified reduction " 843 "items."); 844 return; 845 } 846 CodeGenFunction::OpaqueValueMapping OpaqueMap( 847 CGF, 848 cast<OpaqueValueExpr>( 849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 850 RValue::get(Size)); 851 CGF.EmitVariablyModifiedType(PrivateType); 852 } 853 emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)854 void ReductionCodeGen::emitInitialization( 855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 857 assert(SharedAddresses.size() > N && "No variable was generated"); 858 const auto *PrivateVD = 859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 860 const OMPDeclareReductionDecl *DRD = 861 getReductionInit(ClausesData[N].ReductionOp); 862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 863 if (DRD && DRD->getInitializer()) 864 (void)DefaultInit(CGF); 865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 867 (void)DefaultInit(CGF); 868 QualType SharedType = SharedAddresses[N].first.getType(); 869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 870 PrivateAddr, SharedAddr, SharedType); 871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 872 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 874 PrivateVD->getType().getQualifiers(), 875 /*IsInitializer=*/false); 876 } 877 } 878 needCleanups(unsigned N)879 bool ReductionCodeGen::needCleanups(unsigned N) { 880 QualType PrivateType = getPrivateType(N); 881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 882 return DTorKind != QualType::DK_none; 883 } 884 emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)885 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 886 Address PrivateAddr) { 887 QualType PrivateType = getPrivateType(N); 888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 889 if (needCleanups(N)) { 890 PrivateAddr = 891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 893 } 894 } 895 loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)896 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 897 LValue BaseLV) { 898 BaseTy = BaseTy.getNonReferenceType(); 899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 900 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 903 } else { 904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 906 } 907 BaseTy = BaseTy->getPointeeType(); 908 } 909 return CGF.MakeAddrLValue( 910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)), 911 BaseLV.getType(), BaseLV.getBaseInfo(), 912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 913 } 914 castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,Address OriginalBaseAddress,llvm::Value * Addr)915 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 916 Address OriginalBaseAddress, llvm::Value *Addr) { 917 RawAddress Tmp = RawAddress::invalid(); 918 Address TopTmp = Address::invalid(); 919 Address MostTopTmp = Address::invalid(); 920 BaseTy = BaseTy.getNonReferenceType(); 921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 922 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 923 Tmp = CGF.CreateMemTemp(BaseTy); 924 if (TopTmp.isValid()) 925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 926 else 927 MostTopTmp = Tmp; 928 TopTmp = Tmp; 929 BaseTy = BaseTy->getPointeeType(); 930 } 931 932 if (Tmp.isValid()) { 933 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 934 Addr, Tmp.getElementType()); 935 CGF.Builder.CreateStore(Addr, Tmp); 936 return MostTopTmp; 937 } 938 939 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 940 Addr, OriginalBaseAddress.getType()); 941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 942 } 943 getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)944 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 945 const VarDecl *OrigVD = nullptr; 946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) { 947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base)) 949 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 951 Base = TempASE->getBase()->IgnoreParenImpCasts(); 952 DE = cast<DeclRefExpr>(Base); 953 OrigVD = cast<VarDecl>(DE->getDecl()); 954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 957 Base = TempASE->getBase()->IgnoreParenImpCasts(); 958 DE = cast<DeclRefExpr>(Base); 959 OrigVD = cast<VarDecl>(DE->getDecl()); 960 } 961 return OrigVD; 962 } 963 adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)964 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 965 Address PrivateAddr) { 966 const DeclRefExpr *DE; 967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 968 BaseDecls.emplace_back(OrigVD); 969 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 970 LValue BaseLValue = 971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 972 OriginalBaseLValue); 973 Address SharedAddr = SharedAddresses[N].first.getAddress(); 974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 976 SharedAddr.emitRawPointer(CGF)); 977 llvm::Value *PrivatePointer = 978 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType()); 980 llvm::Value *Ptr = CGF.Builder.CreateGEP( 981 SharedAddr.getElementType(), PrivatePointer, Adjustment); 982 return castToBase(CGF, OrigVD->getType(), 983 SharedAddresses[N].first.getType(), 984 OriginalBaseLValue.getAddress(), Ptr); 985 } 986 BaseDecls.emplace_back( 987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 988 return PrivateAddr; 989 } 990 usesReductionInitializer(unsigned N) const991 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 992 const OMPDeclareReductionDecl *DRD = 993 getReductionInit(ClausesData[N].ReductionOp); 994 return DRD && DRD->getInitializer(); 995 } 996 getThreadIDVariableLValue(CodeGenFunction & CGF)997 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 998 return CGF.EmitLoadOfPointerLValue( 999 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1000 getThreadIDVariable()->getType()->castAs<PointerType>()); 1001 } 1002 EmitBody(CodeGenFunction & CGF,const Stmt * S)1003 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1004 if (!CGF.HaveInsertPoint()) 1005 return; 1006 // 1.2.2 OpenMP Language Terminology 1007 // Structured block - An executable statement with a single entry at the 1008 // top and a single exit at the bottom. 1009 // The point of exit cannot be a branch out of the structured block. 1010 // longjmp() and throw() must not violate the entry/exit criteria. 1011 CGF.EHStack.pushTerminate(); 1012 if (S) 1013 CGF.incrementProfileCounter(S); 1014 CodeGen(CGF); 1015 CGF.EHStack.popTerminate(); 1016 } 1017 getThreadIDVariableLValue(CodeGenFunction & CGF)1018 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1019 CodeGenFunction &CGF) { 1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1021 getThreadIDVariable()->getType(), 1022 AlignmentSource::Decl); 1023 } 1024 addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1025 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1026 QualType FieldTy) { 1027 auto *Field = FieldDecl::Create( 1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1031 Field->setAccess(AS_public); 1032 DC->addDecl(Field); 1033 return Field; 1034 } 1035 CGOpenMPRuntime(CodeGenModule & CGM)1036 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1037 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1039 llvm::OpenMPIRBuilderConfig Config( 1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), 1041 CGM.getLangOpts().OpenMPOffloadMandatory, 1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, 1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); 1044 OMPBuilder.initialize(); 1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice 1046 ? CGM.getLangOpts().OMPHostIRFile 1047 : StringRef{}); 1048 OMPBuilder.setConfig(Config); 1049 1050 // The user forces the compiler to behave as if omp requires 1051 // unified_shared_memory was given. 1052 if (CGM.getLangOpts().OpenMPForceUSM) { 1053 HasRequiresUnifiedSharedMemory = true; 1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 1055 } 1056 } 1057 clear()1058 void CGOpenMPRuntime::clear() { 1059 InternalVars.clear(); 1060 // Clean non-target variable declarations possibly used only in debug info. 1061 for (const auto &Data : EmittedNonTargetVariables) { 1062 if (!Data.getValue().pointsToAliveValue()) 1063 continue; 1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1065 if (!GV) 1066 continue; 1067 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1068 continue; 1069 GV->eraseFromParent(); 1070 } 1071 } 1072 getName(ArrayRef<StringRef> Parts) const1073 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1074 return OMPBuilder.createPlatformSpecificName(Parts); 1075 } 1076 1077 static llvm::Function * emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1078 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1079 const Expr *CombinerInitializer, const VarDecl *In, 1080 const VarDecl *Out, bool IsCombiner) { 1081 // void .omp_combiner.(Ty *in, Ty *out); 1082 ASTContext &C = CGM.getContext(); 1083 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1084 FunctionArgList Args; 1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1089 Args.push_back(&OmpOutParm); 1090 Args.push_back(&OmpInParm); 1091 const CGFunctionInfo &FnInfo = 1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1094 std::string Name = CGM.getOpenMPRuntime().getName( 1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1097 Name, &CGM.getModule()); 1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1099 if (CGM.getLangOpts().Optimize) { 1100 Fn->removeFnAttr(llvm::Attribute::NoInline); 1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1103 } 1104 CodeGenFunction CGF(CGM); 1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1108 Out->getLocation()); 1109 CodeGenFunction::OMPPrivateScope Scope(CGF); 1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1111 Scope.addPrivate( 1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1113 .getAddress()); 1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1115 Scope.addPrivate( 1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1117 .getAddress()); 1118 (void)Scope.Privatize(); 1119 if (!IsCombiner && Out->hasInit() && 1120 !CGF.isTrivialInitializer(Out->getInit())) { 1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1122 Out->getType().getQualifiers(), 1123 /*IsInitializer=*/true); 1124 } 1125 if (CombinerInitializer) 1126 CGF.EmitIgnoredExpr(CombinerInitializer); 1127 Scope.ForceCleanup(); 1128 CGF.FinishFunction(); 1129 return Fn; 1130 } 1131 emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1132 void CGOpenMPRuntime::emitUserDefinedReduction( 1133 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1134 if (UDRMap.count(D) > 0) 1135 return; 1136 llvm::Function *Combiner = emitCombinerOrInitializer( 1137 CGM, D->getType(), D->getCombiner(), 1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1140 /*IsCombiner=*/true); 1141 llvm::Function *Initializer = nullptr; 1142 if (const Expr *Init = D->getInitializer()) { 1143 Initializer = emitCombinerOrInitializer( 1144 CGM, D->getType(), 1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init 1146 : nullptr, 1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1149 /*IsCombiner=*/false); 1150 } 1151 UDRMap.try_emplace(D, Combiner, Initializer); 1152 if (CGF) { 1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1154 Decls.second.push_back(D); 1155 } 1156 } 1157 1158 std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl * D)1159 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1160 auto I = UDRMap.find(D); 1161 if (I != UDRMap.end()) 1162 return I->second; 1163 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1164 return UDRMap.lookup(D); 1165 } 1166 1167 namespace { 1168 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1169 // Builder if one is present. 1170 struct PushAndPopStackRAII { PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1172 bool HasCancel, llvm::omp::Directive Kind) 1173 : OMPBuilder(OMPBuilder) { 1174 if (!OMPBuilder) 1175 return; 1176 1177 // The following callback is the crucial part of clangs cleanup process. 1178 // 1179 // NOTE: 1180 // Once the OpenMPIRBuilder is used to create parallel regions (and 1181 // similar), the cancellation destination (Dest below) is determined via 1182 // IP. That means if we have variables to finalize we split the block at IP, 1183 // use the new block (=BB) as destination to build a JumpDest (via 1184 // getJumpDestInCurrentScope(BB)) which then is fed to 1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1186 // to push & pop an FinalizationInfo object. 1187 // The FiniCB will still be needed but at the point where the 1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1190 assert(IP.getBlock()->end() == IP.getPoint() && 1191 "Clang CG should cause non-terminated block!"); 1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1193 CGF.Builder.restoreIP(IP); 1194 CodeGenFunction::JumpDest Dest = 1195 CGF.getOMPCancelDestination(OMPD_parallel); 1196 CGF.EmitBranchThroughCleanup(Dest); 1197 }; 1198 1199 // TODO: Remove this once we emit parallel regions through the 1200 // OpenMPIRBuilder as it can do this setup internally. 1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1202 OMPBuilder->pushFinalizationCB(std::move(FI)); 1203 } ~PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1204 ~PushAndPopStackRAII() { 1205 if (OMPBuilder) 1206 OMPBuilder->popFinalizationCB(); 1207 } 1208 llvm::OpenMPIRBuilder *OMPBuilder; 1209 }; 1210 } // namespace 1211 emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1212 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1216 assert(ThreadIDVar->getType()->isPointerType() && 1217 "thread id variable must be of type kmp_int32 *"); 1218 CodeGenFunction CGF(CGM, true); 1219 bool HasCancel = false; 1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1221 HasCancel = OPD->hasCancel(); 1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1223 HasCancel = OPD->hasCancel(); 1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1225 HasCancel = OPSD->hasCancel(); 1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1227 HasCancel = OPFD->hasCancel(); 1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1229 HasCancel = OPFD->hasCancel(); 1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1231 HasCancel = OPFD->hasCancel(); 1232 else if (const auto *OPFD = 1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1234 HasCancel = OPFD->hasCancel(); 1235 else if (const auto *OPFD = 1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1237 HasCancel = OPFD->hasCancel(); 1238 1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1240 // parallel region to make cancellation barriers work properly. 1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1244 HasCancel, OutlinedHelperName); 1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1246 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1247 } 1248 getOutlinedHelperName(StringRef Name) const1249 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1250 std::string Suffix = getName({"omp_outlined"}); 1251 return (Name + Suffix).str(); 1252 } 1253 getOutlinedHelperName(CodeGenFunction & CGF) const1254 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1255 return getOutlinedHelperName(CGF.CurFn->getName()); 1256 } 1257 getReductionFuncName(StringRef Name) const1258 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1260 return (Name + Suffix).str(); 1261 } 1262 emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1263 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1264 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1266 const RegionCodeGenTy &CodeGen) { 1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1268 return emitParallelOrTeamsOutlinedFunction( 1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1270 CodeGen); 1271 } 1272 emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1274 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1276 const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1280 CodeGen); 1281 } 1282 emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1283 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1287 bool Tied, unsigned &NumberOfParts) { 1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1289 PrePostActionTy &) { 1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1292 llvm::Value *TaskArgs[] = { 1293 UpLoc, ThreadID, 1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1295 TaskTVar->getType()->castAs<PointerType>()) 1296 .getPointer(CGF)}; 1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1298 CGM.getModule(), OMPRTL___kmpc_omp_task), 1299 TaskArgs); 1300 }; 1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1302 UntiedCodeGen); 1303 CodeGen.setAction(Action); 1304 assert(!ThreadIDVar->getType()->isPointerType() && 1305 "thread id variable must be of type kmp_int32 for tasks"); 1306 const OpenMPDirectiveKind Region = 1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1308 : OMPD_task; 1309 const CapturedStmt *CS = D.getCapturedStmt(Region); 1310 bool HasCancel = false; 1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1312 HasCancel = TD->hasCancel(); 1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1314 HasCancel = TD->hasCancel(); 1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1316 HasCancel = TD->hasCancel(); 1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1318 HasCancel = TD->hasCancel(); 1319 1320 CodeGenFunction CGF(CGM, true); 1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1322 InnermostKind, HasCancel, Action); 1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1325 if (!Tied) 1326 NumberOfParts = Action.getNumberOfParts(); 1327 return Res; 1328 } 1329 setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1330 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1331 bool AtCurrentPoint) { 1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1334 1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1336 if (AtCurrentPoint) { 1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1339 } else { 1340 Elem.second.ServiceInsertPt = 1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1343 } 1344 } 1345 clearLocThreadIdInsertPt(CodeGenFunction & CGF)1346 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1348 if (Elem.second.ServiceInsertPt) { 1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1350 Elem.second.ServiceInsertPt = nullptr; 1351 Ptr->eraseFromParent(); 1352 } 1353 } 1354 getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1355 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1356 SourceLocation Loc, 1357 SmallString<128> &Buffer) { 1358 llvm::raw_svector_ostream OS(Buffer); 1359 // Build debug location 1360 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1361 OS << ";" << PLoc.getFilename() << ";"; 1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1363 OS << FD->getQualifiedNameAsString(); 1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1365 return OS.str(); 1366 } 1367 emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags,bool EmitLoc)1368 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1369 SourceLocation Loc, 1370 unsigned Flags, bool EmitLoc) { 1371 uint32_t SrcLocStrSize; 1372 llvm::Constant *SrcLocStr; 1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1374 llvm::codegenoptions::NoDebugInfo) || 1375 Loc.isInvalid()) { 1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1377 } else { 1378 std::string FunctionName; 1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1380 FunctionName = FD->getQualifiedNameAsString(); 1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1382 const char *FileName = PLoc.getFilename(); 1383 unsigned Line = PLoc.getLine(); 1384 unsigned Column = PLoc.getColumn(); 1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1386 Column, SrcLocStrSize); 1387 } 1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1389 return OMPBuilder.getOrCreateIdent( 1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1391 } 1392 getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1393 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1394 SourceLocation Loc) { 1395 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1397 // the clang invariants used below might be broken. 1398 if (CGM.getLangOpts().OpenMPIRBuilder) { 1399 SmallString<128> Buffer; 1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1401 uint32_t SrcLocStrSize; 1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1404 return OMPBuilder.getOrCreateThreadID( 1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1406 } 1407 1408 llvm::Value *ThreadID = nullptr; 1409 // Check whether we've already cached a load of the thread id in this 1410 // function. 1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1412 if (I != OpenMPLocThreadIDMap.end()) { 1413 ThreadID = I->second.ThreadID; 1414 if (ThreadID != nullptr) 1415 return ThreadID; 1416 } 1417 // If exceptions are enabled, do not use parameter to avoid possible crash. 1418 if (auto *OMPRegionInfo = 1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1420 if (OMPRegionInfo->getThreadIDVariable()) { 1421 // Check if this an outlined function with thread id passed as argument. 1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1425 !CGF.getLangOpts().CXXExceptions || 1426 CGF.Builder.GetInsertBlock() == TopBlock || 1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1429 TopBlock || 1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1431 CGF.Builder.GetInsertBlock()) { 1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1433 // If value loaded in entry block, cache it and use it everywhere in 1434 // function. 1435 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1437 Elem.second.ThreadID = ThreadID; 1438 } 1439 return ThreadID; 1440 } 1441 } 1442 } 1443 1444 // This is not an outlined function region - need to call __kmpc_int32 1445 // kmpc_global_thread_num(ident_t *loc). 1446 // Generate thread id value and cache this value for use across the 1447 // function. 1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1449 if (!Elem.second.ServiceInsertPt) 1450 setLocThreadIdInsertPt(CGF); 1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1453 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 1454 llvm::CallInst *Call = CGF.Builder.CreateCall( 1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1456 OMPRTL___kmpc_global_thread_num), 1457 emitUpdateLocation(CGF, Loc)); 1458 Call->setCallingConv(CGF.getRuntimeCC()); 1459 Elem.second.ThreadID = Call; 1460 return Call; 1461 } 1462 functionFinished(CodeGenFunction & CGF)1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1466 clearLocThreadIdInsertPt(CGF); 1467 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1468 } 1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1470 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1471 UDRMap.erase(D); 1472 FunctionUDRMap.erase(CGF.CurFn); 1473 } 1474 auto I = FunctionUDMMap.find(CGF.CurFn); 1475 if (I != FunctionUDMMap.end()) { 1476 for(const auto *D : I->second) 1477 UDMMap.erase(D); 1478 FunctionUDMMap.erase(I); 1479 } 1480 LastprivateConditionalToTypes.erase(CGF.CurFn); 1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1482 } 1483 getIdentTyPointerTy()1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1485 return OMPBuilder.IdentPtr; 1486 } 1487 getKmpc_MicroPointerTy()1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1489 if (!Kmpc_MicroTy) { 1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1492 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1494 } 1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1496 } 1497 1498 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl * VD)1499 convertDeviceClause(const VarDecl *VD) { 1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1501 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1502 if (!DevTy) 1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1504 1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1506 case OMPDeclareTargetDeclAttr::DT_Host: 1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1508 break; 1509 case OMPDeclareTargetDeclAttr::DT_NoHost: 1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1511 break; 1512 case OMPDeclareTargetDeclAttr::DT_Any: 1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1514 break; 1515 default: 1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1517 break; 1518 } 1519 } 1520 1521 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl * VD)1522 convertCaptureClause(const VarDecl *VD) { 1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1525 if (!MapType) 1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1530 break; 1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1533 break; 1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1536 break; 1537 default: 1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1539 break; 1540 } 1541 } 1542 getEntryInfoFromPresumedLoc(CodeGenModule & CGM,llvm::OpenMPIRBuilder & OMPBuilder,SourceLocation BeginLoc,llvm::StringRef ParentName="")1543 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1546 1547 auto FileInfoCallBack = [&]() { 1548 SourceManager &SM = CGM.getContext().getSourceManager(); 1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1550 1551 llvm::sys::fs::UniqueID ID; 1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1554 } 1555 1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1557 }; 1558 1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1560 } 1561 getAddrOfDeclareTargetVar(const VarDecl * VD)1562 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1564 1565 auto LinkageForVariable = [&VD, this]() { 1566 return CGM.getLLVMLinkageVarDefinition(VD); 1567 }; 1568 1569 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1570 1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1572 CGM.getContext().getPointerType(VD->getType())); 1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1574 convertCaptureClause(VD), convertDeviceClause(VD), 1575 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1576 VD->isExternallyVisible(), 1577 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1578 VD->getCanonicalDecl()->getBeginLoc()), 1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1581 LinkageForVariable); 1582 1583 if (!addr) 1584 return ConstantAddress::invalid(); 1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1586 } 1587 1588 llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl * VD)1589 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1590 assert(!CGM.getLangOpts().OpenMPUseTLS || 1591 !CGM.getContext().getTargetInfo().isTLSSupported()); 1592 // Lookup the entry, lazily creating it if necessary. 1593 std::string Suffix = getName({"cache", ""}); 1594 return OMPBuilder.getOrCreateInternalVariable( 1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1596 } 1597 getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1598 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1599 const VarDecl *VD, 1600 Address VDAddr, 1601 SourceLocation Loc) { 1602 if (CGM.getLangOpts().OpenMPUseTLS && 1603 CGM.getContext().getTargetInfo().isTLSSupported()) 1604 return VDAddr; 1605 1606 llvm::Type *VarTy = VDAddr.getElementType(); 1607 llvm::Value *Args[] = { 1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy), 1610 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1611 getOrCreateThreadPrivateCache(VD)}; 1612 return Address( 1613 CGF.EmitRuntimeCall( 1614 OMPBuilder.getOrCreateRuntimeFunction( 1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1616 Args), 1617 CGF.Int8Ty, VDAddr.getAlignment()); 1618 } 1619 emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1620 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1624 // library. 1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1628 OMPLoc); 1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1630 // to register constructor/destructor for variable. 1631 llvm::Value *Args[] = { 1632 OMPLoc, 1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy), 1634 Ctor, CopyCtor, Dtor}; 1635 CGF.EmitRuntimeCall( 1636 OMPBuilder.getOrCreateRuntimeFunction( 1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1638 Args); 1639 } 1640 emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1641 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1643 bool PerformInit, CodeGenFunction *CGF) { 1644 if (CGM.getLangOpts().OpenMPUseTLS && 1645 CGM.getContext().getTargetInfo().isTLSSupported()) 1646 return nullptr; 1647 1648 VD = VD->getDefinition(CGM.getContext()); 1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1650 QualType ASTTy = VD->getType(); 1651 1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1653 const Expr *Init = VD->getAnyInitializer(); 1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1655 // Generate function that re-emits the declaration's initializer into the 1656 // threadprivate copy of the variable VD 1657 CodeGenFunction CtorCGF(CGM); 1658 FunctionArgList Args; 1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1661 ImplicitParamKind::Other); 1662 Args.push_back(&Dst); 1663 1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1665 CGM.getContext().VoidPtrTy, Args); 1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1667 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1668 llvm::Function *Fn = 1669 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1671 Args, Loc, Loc); 1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1674 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1676 VDAddr.getAlignment()); 1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1678 /*IsInitializer=*/true); 1679 ArgVal = CtorCGF.EmitLoadOfScalar( 1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1681 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1683 CtorCGF.FinishFunction(); 1684 Ctor = Fn; 1685 } 1686 if (VD->getType().isDestructedType() != QualType::DK_none) { 1687 // Generate function that emits destructor call for the threadprivate copy 1688 // of the variable VD 1689 CodeGenFunction DtorCGF(CGM); 1690 FunctionArgList Args; 1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1693 ImplicitParamKind::Other); 1694 Args.push_back(&Dst); 1695 1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1697 CGM.getContext().VoidTy, Args); 1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1699 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1700 llvm::Function *Fn = 1701 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1704 Loc, Loc); 1705 // Create a scope with an artificial location for the body of this function. 1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1708 DtorCGF.GetAddrOfLocalVar(&Dst), 1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1710 DtorCGF.emitDestroy( 1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1714 DtorCGF.FinishFunction(); 1715 Dtor = Fn; 1716 } 1717 // Do not emit init function if it is not required. 1718 if (!Ctor && !Dtor) 1719 return nullptr; 1720 1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1723 /*isVarArg=*/false) 1724 ->getPointerTo(); 1725 // Copying constructor for the threadprivate variable. 1726 // Must be NULL - reserved by runtime, but currently it requires that this 1727 // parameter is always NULL. Otherwise it fires assertion. 1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1729 if (Ctor == nullptr) { 1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1731 /*isVarArg=*/false) 1732 ->getPointerTo(); 1733 Ctor = llvm::Constant::getNullValue(CtorTy); 1734 } 1735 if (Dtor == nullptr) { 1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1737 /*isVarArg=*/false) 1738 ->getPointerTo(); 1739 Dtor = llvm::Constant::getNullValue(DtorTy); 1740 } 1741 if (!CGF) { 1742 auto *InitFunctionTy = 1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1744 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1747 CodeGenFunction InitCGF(CGM); 1748 FunctionArgList ArgList; 1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1750 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1751 Loc, Loc); 1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1753 InitCGF.FinishFunction(); 1754 return InitFunction; 1755 } 1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1757 } 1758 return nullptr; 1759 } 1760 emitDeclareTargetFunction(const FunctionDecl * FD,llvm::GlobalValue * GV)1761 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, 1762 llvm::GlobalValue *GV) { 1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = 1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD); 1765 1766 // We only need to handle active 'indirect' declare target functions. 1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) 1768 return; 1769 1770 // Get a mangled name to store the new device global in. 1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 1772 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); 1773 SmallString<128> Name; 1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); 1775 1776 // We need to generate a new global to hold the address of the indirectly 1777 // called device function. Doing this allows us to keep the visibility and 1778 // linkage of the associated function unchanged while allowing the runtime to 1779 // access its value. 1780 llvm::GlobalValue *Addr = GV; 1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1782 Addr = new llvm::GlobalVariable( 1783 CGM.getModule(), CGM.VoidPtrTy, 1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, 1785 nullptr, llvm::GlobalValue::NotThreadLocal, 1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); 1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1788 } 1789 1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( 1791 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), 1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, 1793 llvm::GlobalValue::WeakODRLinkage); 1794 } 1795 getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1796 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1797 QualType VarType, 1798 StringRef Name) { 1799 std::string Suffix = getName({"artificial", ""}); 1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1802 VarLVType, Twine(Name).concat(Suffix).str()); 1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1804 CGM.getTarget().isTLSSupported()) { 1805 GAddr->setThreadLocal(/*Val=*/true); 1806 return Address(GAddr, GAddr->getValueType(), 1807 CGM.getContext().getTypeAlignInChars(VarType)); 1808 } 1809 std::string CacheSuffix = getName({"cache", ""}); 1810 llvm::Value *Args[] = { 1811 emitUpdateLocation(CGF, SourceLocation()), 1812 getThreadID(CGF, SourceLocation()), 1813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1815 /*isSigned=*/false), 1816 OMPBuilder.getOrCreateInternalVariable( 1817 CGM.VoidPtrPtrTy, 1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1819 return Address( 1820 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1821 CGF.EmitRuntimeCall( 1822 OMPBuilder.getOrCreateRuntimeFunction( 1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1824 Args), 1825 VarLVType->getPointerTo(/*AddrSpace=*/0)), 1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1827 } 1828 emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1829 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1830 const RegionCodeGenTy &ThenGen, 1831 const RegionCodeGenTy &ElseGen) { 1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1833 1834 // If the condition constant folds and can be elided, try to avoid emitting 1835 // the condition and the dead arm of the if/else. 1836 bool CondConstant; 1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1838 if (CondConstant) 1839 ThenGen(CGF); 1840 else 1841 ElseGen(CGF); 1842 return; 1843 } 1844 1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1846 // emit the conditional branch. 1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1851 1852 // Emit the 'then' code. 1853 CGF.EmitBlock(ThenBlock); 1854 ThenGen(CGF); 1855 CGF.EmitBranch(ContBlock); 1856 // Emit the 'else' code if present. 1857 // There is no need to emit line number for unconditional branch. 1858 (void)ApplyDebugLocation::CreateEmpty(CGF); 1859 CGF.EmitBlock(ElseBlock); 1860 ElseGen(CGF); 1861 // There is no need to emit line number for unconditional branch. 1862 (void)ApplyDebugLocation::CreateEmpty(CGF); 1863 CGF.EmitBranch(ContBlock); 1864 // Emit the continuation block for code after the if. 1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1866 } 1867 emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)1868 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1869 llvm::Function *OutlinedFn, 1870 ArrayRef<llvm::Value *> CapturedVars, 1871 const Expr *IfCond, 1872 llvm::Value *NumThreads) { 1873 if (!CGF.HaveInsertPoint()) 1874 return; 1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 1876 auto &M = CGM.getModule(); 1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 1878 this](CodeGenFunction &CGF, PrePostActionTy &) { 1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1880 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1881 llvm::Value *Args[] = { 1882 RTLoc, 1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1885 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1886 RealArgs.append(std::begin(Args), std::end(Args)); 1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1888 1889 llvm::FunctionCallee RTLFn = 1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 1891 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1892 }; 1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 1894 this](CodeGenFunction &CGF, PrePostActionTy &) { 1895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 1897 // Build calls: 1898 // __kmpc_serialized_parallel(&Loc, GTid); 1899 llvm::Value *Args[] = {RTLoc, ThreadID}; 1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1901 M, OMPRTL___kmpc_serialized_parallel), 1902 Args); 1903 1904 // OutlinedFn(>id, &zero_bound, CapturedStruct); 1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1906 RawAddress ZeroAddrBound = 1907 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 1908 /*Name=*/".bound.zero.addr"); 1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 1910 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1911 // ThreadId for serialized parallels is 0. 1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF)); 1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1915 1916 // Ensure we do not inline the function. This is trivially true for the ones 1917 // passed to __kmpc_fork_call but the ones called in serialized regions 1918 // could be inlined. This is not a perfect but it is closer to the invariant 1919 // we want, namely, every data environment starts with a new function. 1920 // TODO: We should pass the if condition to the runtime function and do the 1921 // handling there. Much cleaner code. 1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 1925 1926 // __kmpc_end_serialized_parallel(&Loc, GTid); 1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1929 M, OMPRTL___kmpc_end_serialized_parallel), 1930 EndArgs); 1931 }; 1932 if (IfCond) { 1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1934 } else { 1935 RegionCodeGenTy ThenRCG(ThenGen); 1936 ThenRCG(CGF); 1937 } 1938 } 1939 1940 // If we're inside an (outlined) parallel region, use the region info's 1941 // thread-ID variable (it is passed in a first argument of the outlined function 1942 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1943 // regular serial code region, get thread ID by calling kmp_int32 1944 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1945 // return the address of that temp. emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1946 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1947 SourceLocation Loc) { 1948 if (auto *OMPRegionInfo = 1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1950 if (OMPRegionInfo->getThreadIDVariable()) 1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1952 1953 llvm::Value *ThreadID = getThreadID(CGF, Loc); 1954 QualType Int32Ty = 1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1957 CGF.EmitStoreOfScalar(ThreadID, 1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1959 1960 return ThreadIDTemp; 1961 } 1962 getCriticalRegionLock(StringRef CriticalName)1963 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1965 std::string Name = getName({Prefix, "var"}); 1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 1967 } 1968 1969 namespace { 1970 /// Common pre(post)-action for different OpenMP constructs. 1971 class CommonActionTy final : public PrePostActionTy { 1972 llvm::FunctionCallee EnterCallee; 1973 ArrayRef<llvm::Value *> EnterArgs; 1974 llvm::FunctionCallee ExitCallee; 1975 ArrayRef<llvm::Value *> ExitArgs; 1976 bool Conditional; 1977 llvm::BasicBlock *ContBlock = nullptr; 1978 1979 public: CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)1980 CommonActionTy(llvm::FunctionCallee EnterCallee, 1981 ArrayRef<llvm::Value *> EnterArgs, 1982 llvm::FunctionCallee ExitCallee, 1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1985 ExitArgs(ExitArgs), Conditional(Conditional) {} Enter(CodeGenFunction & CGF)1986 void Enter(CodeGenFunction &CGF) override { 1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1988 if (Conditional) { 1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1991 ContBlock = CGF.createBasicBlock("omp_if.end"); 1992 // Generate the branch (If-stmt) 1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1994 CGF.EmitBlock(ThenBlock); 1995 } 1996 } Done(CodeGenFunction & CGF)1997 void Done(CodeGenFunction &CGF) { 1998 // Emit the rest of blocks/branches 1999 CGF.EmitBranch(ContBlock); 2000 CGF.EmitBlock(ContBlock, true); 2001 } Exit(CodeGenFunction & CGF)2002 void Exit(CodeGenFunction &CGF) override { 2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2004 } 2005 }; 2006 } // anonymous namespace 2007 emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2008 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2009 StringRef CriticalName, 2010 const RegionCodeGenTy &CriticalOpGen, 2011 SourceLocation Loc, const Expr *Hint) { 2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2013 // CriticalOpGen(); 2014 // __kmpc_end_critical(ident_t *, gtid, Lock); 2015 // Prepare arguments and build a call to __kmpc_critical 2016 if (!CGF.HaveInsertPoint()) 2017 return; 2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2019 getCriticalRegionLock(CriticalName)}; 2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2021 std::end(Args)); 2022 if (Hint) { 2023 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2025 } 2026 CommonActionTy Action( 2027 OMPBuilder.getOrCreateRuntimeFunction( 2028 CGM.getModule(), 2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2030 EnterArgs, 2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2032 OMPRTL___kmpc_end_critical), 2033 Args); 2034 CriticalOpGen.setAction(Action); 2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2036 } 2037 emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2038 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2039 const RegionCodeGenTy &MasterOpGen, 2040 SourceLocation Loc) { 2041 if (!CGF.HaveInsertPoint()) 2042 return; 2043 // if(__kmpc_master(ident_t *, gtid)) { 2044 // MasterOpGen(); 2045 // __kmpc_end_master(ident_t *, gtid); 2046 // } 2047 // Prepare arguments and build a call to __kmpc_master 2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2050 CGM.getModule(), OMPRTL___kmpc_master), 2051 Args, 2052 OMPBuilder.getOrCreateRuntimeFunction( 2053 CGM.getModule(), OMPRTL___kmpc_end_master), 2054 Args, 2055 /*Conditional=*/true); 2056 MasterOpGen.setAction(Action); 2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2058 Action.Done(CGF); 2059 } 2060 emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2061 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2062 const RegionCodeGenTy &MaskedOpGen, 2063 SourceLocation Loc, const Expr *Filter) { 2064 if (!CGF.HaveInsertPoint()) 2065 return; 2066 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2067 // MaskedOpGen(); 2068 // __kmpc_end_masked(iden_t *, gtid); 2069 // } 2070 // Prepare arguments and build a call to __kmpc_masked 2071 llvm::Value *FilterVal = Filter 2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2075 FilterVal}; 2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2077 getThreadID(CGF, Loc)}; 2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2079 CGM.getModule(), OMPRTL___kmpc_masked), 2080 Args, 2081 OMPBuilder.getOrCreateRuntimeFunction( 2082 CGM.getModule(), OMPRTL___kmpc_end_masked), 2083 ArgsEnd, 2084 /*Conditional=*/true); 2085 MaskedOpGen.setAction(Action); 2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2087 Action.Done(CGF); 2088 } 2089 emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2090 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2091 SourceLocation Loc) { 2092 if (!CGF.HaveInsertPoint()) 2093 return; 2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2095 OMPBuilder.createTaskyield(CGF.Builder); 2096 } else { 2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2098 llvm::Value *Args[] = { 2099 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2103 Args); 2104 } 2105 2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2107 Region->emitUntiedSwitch(CGF); 2108 } 2109 emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2110 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2111 const RegionCodeGenTy &TaskgroupOpGen, 2112 SourceLocation Loc) { 2113 if (!CGF.HaveInsertPoint()) 2114 return; 2115 // __kmpc_taskgroup(ident_t *, gtid); 2116 // TaskgroupOpGen(); 2117 // __kmpc_end_taskgroup(ident_t *, gtid); 2118 // Prepare arguments and build a call to __kmpc_taskgroup 2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2121 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2122 Args, 2123 OMPBuilder.getOrCreateRuntimeFunction( 2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2125 Args); 2126 TaskgroupOpGen.setAction(Action); 2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2128 } 2129 2130 /// Given an array of pointers to variables, project the address of a 2131 /// given variable. emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2132 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2133 unsigned Index, const VarDecl *Var) { 2134 // Pull out the pointer to the variable. 2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2137 2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2139 return Address( 2140 CGF.Builder.CreateBitCast( 2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), 2142 ElemTy, CGF.getContext().getDeclAlign(Var)); 2143 } 2144 emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsElemType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2145 static llvm::Value *emitCopyprivateCopyFunction( 2146 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2149 SourceLocation Loc) { 2150 ASTContext &C = CGM.getContext(); 2151 // void copy_func(void *LHSArg, void *RHSArg); 2152 FunctionArgList Args; 2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2154 ImplicitParamKind::Other); 2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2156 ImplicitParamKind::Other); 2157 Args.push_back(&LHSArg); 2158 Args.push_back(&RHSArg); 2159 const auto &CGFI = 2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2161 std::string Name = 2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2164 llvm::GlobalValue::InternalLinkage, Name, 2165 &CGM.getModule()); 2166 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2167 Fn->setDoesNotRecurse(); 2168 CodeGenFunction CGF(CGM); 2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2170 // Dest = (void*[n])(LHSArg); 2171 // Src = (void*[n])(RHSArg); 2172 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2174 ArgsElemType->getPointerTo()), 2175 ArgsElemType, CGF.getPointerAlign()); 2176 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2178 ArgsElemType->getPointerTo()), 2179 ArgsElemType, CGF.getPointerAlign()); 2180 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2181 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2182 // ... 2183 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2185 const auto *DestVar = 2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2188 2189 const auto *SrcVar = 2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2192 2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2194 QualType Type = VD->getType(); 2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2196 } 2197 CGF.FinishFunction(); 2198 return Fn; 2199 } 2200 emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2201 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2202 const RegionCodeGenTy &SingleOpGen, 2203 SourceLocation Loc, 2204 ArrayRef<const Expr *> CopyprivateVars, 2205 ArrayRef<const Expr *> SrcExprs, 2206 ArrayRef<const Expr *> DstExprs, 2207 ArrayRef<const Expr *> AssignmentOps) { 2208 if (!CGF.HaveInsertPoint()) 2209 return; 2210 assert(CopyprivateVars.size() == SrcExprs.size() && 2211 CopyprivateVars.size() == DstExprs.size() && 2212 CopyprivateVars.size() == AssignmentOps.size()); 2213 ASTContext &C = CGM.getContext(); 2214 // int32 did_it = 0; 2215 // if(__kmpc_single(ident_t *, gtid)) { 2216 // SingleOpGen(); 2217 // __kmpc_end_single(ident_t *, gtid); 2218 // did_it = 1; 2219 // } 2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2221 // <copy_func>, did_it); 2222 2223 Address DidIt = Address::invalid(); 2224 if (!CopyprivateVars.empty()) { 2225 // int32 did_it = 0; 2226 QualType KmpInt32Ty = 2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2230 } 2231 // Prepare arguments and build a call to __kmpc_single 2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2234 CGM.getModule(), OMPRTL___kmpc_single), 2235 Args, 2236 OMPBuilder.getOrCreateRuntimeFunction( 2237 CGM.getModule(), OMPRTL___kmpc_end_single), 2238 Args, 2239 /*Conditional=*/true); 2240 SingleOpGen.setAction(Action); 2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2242 if (DidIt.isValid()) { 2243 // did_it = 1; 2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2245 } 2246 Action.Done(CGF); 2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2248 // <copy_func>, did_it); 2249 if (DidIt.isValid()) { 2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2251 QualType CopyprivateArrayTy = C.getConstantArrayType( 2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 2253 /*IndexTypeQuals=*/0); 2254 // Create a list of all private variables for copyprivate. 2255 Address CopyprivateList = 2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2259 CGF.Builder.CreateStore( 2260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2262 CGF.VoidPtrTy), 2263 Elem); 2264 } 2265 // Build function that copies private values from single region to all other 2266 // threads in the corresponding parallel region. 2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2269 SrcExprs, DstExprs, AssignmentOps, Loc); 2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2271 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2274 llvm::Value *Args[] = { 2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2276 getThreadID(CGF, Loc), // i32 <gtid> 2277 BufSize, // size_t <buf_size> 2278 CL.emitRawPointer(CGF), // void *<copyprivate list> 2279 CpyFn, // void (*) (void *, void *) <copy_func> 2280 DidItVal // i32 did_it 2281 }; 2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2283 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2284 Args); 2285 } 2286 } 2287 emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2288 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2289 const RegionCodeGenTy &OrderedOpGen, 2290 SourceLocation Loc, bool IsThreads) { 2291 if (!CGF.HaveInsertPoint()) 2292 return; 2293 // __kmpc_ordered(ident_t *, gtid); 2294 // OrderedOpGen(); 2295 // __kmpc_end_ordered(ident_t *, gtid); 2296 // Prepare arguments and build a call to __kmpc_ordered 2297 if (IsThreads) { 2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_ordered), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2304 Args); 2305 OrderedOpGen.setAction(Action); 2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2307 return; 2308 } 2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2310 } 2311 getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2313 unsigned Flags; 2314 if (Kind == OMPD_for) 2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2316 else if (Kind == OMPD_sections) 2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2318 else if (Kind == OMPD_single) 2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2320 else if (Kind == OMPD_barrier) 2321 Flags = OMP_IDENT_BARRIER_EXPL; 2322 else 2323 Flags = OMP_IDENT_BARRIER_IMPL; 2324 return Flags; 2325 } 2326 getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2327 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2328 CodeGenFunction &CGF, const OMPLoopDirective &S, 2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2330 // Check if the loop directive is actually a doacross loop directive. In this 2331 // case choose static, 1 schedule. 2332 if (llvm::any_of( 2333 S.getClausesOfKind<OMPOrderedClause>(), 2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2335 ScheduleKind = OMPC_SCHEDULE_static; 2336 // Chunk size is 1 in this case. 2337 llvm::APInt ChunkSize(32, 1); 2338 ChunkExpr = IntegerLiteral::Create( 2339 CGF.getContext(), ChunkSize, 2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2341 SourceLocation()); 2342 } 2343 } 2344 emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2346 OpenMPDirectiveKind Kind, bool EmitChecks, 2347 bool ForceSimpleCall) { 2348 // Check if we should use the OMPBuilder 2349 auto *OMPRegionInfo = 2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2354 return; 2355 } 2356 2357 if (!CGF.HaveInsertPoint()) 2358 return; 2359 // Build call __kmpc_cancel_barrier(loc, thread_id); 2360 // Build call __kmpc_barrier(loc, thread_id); 2361 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2363 // thread_id); 2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2365 getThreadID(CGF, Loc)}; 2366 if (OMPRegionInfo) { 2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2368 llvm::Value *Result = CGF.EmitRuntimeCall( 2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2370 OMPRTL___kmpc_cancel_barrier), 2371 Args); 2372 if (EmitChecks) { 2373 // if (__kmpc_cancel_barrier()) { 2374 // exit from construct; 2375 // } 2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2380 CGF.EmitBlock(ExitBB); 2381 // exit from construct; 2382 CodeGenFunction::JumpDest CancelDestination = 2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2384 CGF.EmitBranchThroughCleanup(CancelDestination); 2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2386 } 2387 return; 2388 } 2389 } 2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2391 CGM.getModule(), OMPRTL___kmpc_barrier), 2392 Args); 2393 } 2394 emitErrorCall(CodeGenFunction & CGF,SourceLocation Loc,Expr * ME,bool IsFatal)2395 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2396 Expr *ME, bool IsFatal) { 2397 llvm::Value *MVL = 2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2401 // *message) 2402 llvm::Value *Args[] = { 2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2407 CGM.getModule(), OMPRTL___kmpc_error), 2408 Args); 2409 } 2410 2411 /// Map the OpenMP loop schedule to the runtime enumeration. getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2412 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2413 bool Chunked, bool Ordered) { 2414 switch (ScheduleKind) { 2415 case OMPC_SCHEDULE_static: 2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2417 : (Ordered ? OMP_ord_static : OMP_sch_static); 2418 case OMPC_SCHEDULE_dynamic: 2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2420 case OMPC_SCHEDULE_guided: 2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2422 case OMPC_SCHEDULE_runtime: 2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2424 case OMPC_SCHEDULE_auto: 2425 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2426 case OMPC_SCHEDULE_unknown: 2427 assert(!Chunked && "chunk was specified but schedule kind not known"); 2428 return Ordered ? OMP_ord_static : OMP_sch_static; 2429 } 2430 llvm_unreachable("Unexpected runtime schedule"); 2431 } 2432 2433 /// Map the OpenMP distribute schedule to the runtime enumeration. 2434 static OpenMPSchedType getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2435 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2436 // only static is allowed for dist_schedule 2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2438 } 2439 isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2440 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2441 bool Chunked) const { 2442 OpenMPSchedType Schedule = 2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2444 return Schedule == OMP_sch_static; 2445 } 2446 isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2447 bool CGOpenMPRuntime::isStaticNonchunked( 2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2450 return Schedule == OMP_dist_sch_static; 2451 } 2452 isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2453 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2454 bool Chunked) const { 2455 OpenMPSchedType Schedule = 2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2457 return Schedule == OMP_sch_static_chunked; 2458 } 2459 isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2460 bool CGOpenMPRuntime::isStaticChunked( 2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2463 return Schedule == OMP_dist_sch_static_chunked; 2464 } 2465 isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2466 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2467 OpenMPSchedType Schedule = 2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2470 return Schedule != OMP_sch_static; 2471 } 2472 addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2473 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2474 OpenMPScheduleClauseModifier M1, 2475 OpenMPScheduleClauseModifier M2) { 2476 int Modifier = 0; 2477 switch (M1) { 2478 case OMPC_SCHEDULE_MODIFIER_monotonic: 2479 Modifier = OMP_sch_modifier_monotonic; 2480 break; 2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2482 Modifier = OMP_sch_modifier_nonmonotonic; 2483 break; 2484 case OMPC_SCHEDULE_MODIFIER_simd: 2485 if (Schedule == OMP_sch_static_chunked) 2486 Schedule = OMP_sch_static_balanced_chunked; 2487 break; 2488 case OMPC_SCHEDULE_MODIFIER_last: 2489 case OMPC_SCHEDULE_MODIFIER_unknown: 2490 break; 2491 } 2492 switch (M2) { 2493 case OMPC_SCHEDULE_MODIFIER_monotonic: 2494 Modifier = OMP_sch_modifier_monotonic; 2495 break; 2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2497 Modifier = OMP_sch_modifier_nonmonotonic; 2498 break; 2499 case OMPC_SCHEDULE_MODIFIER_simd: 2500 if (Schedule == OMP_sch_static_chunked) 2501 Schedule = OMP_sch_static_balanced_chunked; 2502 break; 2503 case OMPC_SCHEDULE_MODIFIER_last: 2504 case OMPC_SCHEDULE_MODIFIER_unknown: 2505 break; 2506 } 2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2508 // If the static schedule kind is specified or if the ordered clause is 2509 // specified, and if the nonmonotonic modifier is not specified, the effect is 2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2511 // modifier is specified, the effect is as if the nonmonotonic modifier is 2512 // specified. 2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2515 Schedule == OMP_sch_static_balanced_chunked || 2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2517 Schedule == OMP_dist_sch_static_chunked || 2518 Schedule == OMP_dist_sch_static)) 2519 Modifier = OMP_sch_modifier_nonmonotonic; 2520 } 2521 return Schedule | Modifier; 2522 } 2523 emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2524 void CGOpenMPRuntime::emitForDispatchInit( 2525 CodeGenFunction &CGF, SourceLocation Loc, 2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2527 bool Ordered, const DispatchRTInput &DispatchValues) { 2528 if (!CGF.HaveInsertPoint()) 2529 return; 2530 OpenMPSchedType Schedule = getRuntimeSchedule( 2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2532 assert(Ordered || 2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2535 Schedule != OMP_sch_static_balanced_chunked)); 2536 // Call __kmpc_dispatch_init( 2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2538 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2540 2541 // If the Chunk was not specified in the clause - use default value 1. 2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2543 : CGF.Builder.getIntN(IVSize, 1); 2544 llvm::Value *Args[] = { 2545 emitUpdateLocation(CGF, Loc), 2546 getThreadID(CGF, Loc), 2547 CGF.Builder.getInt32(addMonoNonMonoModifier( 2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2549 DispatchValues.LB, // Lower 2550 DispatchValues.UB, // Upper 2551 CGF.Builder.getIntN(IVSize, 1), // Stride 2552 Chunk // Chunk 2553 }; 2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), 2555 Args); 2556 } 2557 emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)2558 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 2559 SourceLocation Loc) { 2560 if (!CGF.HaveInsertPoint()) 2561 return; 2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid); 2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args); 2565 } 2566 emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2567 static void emitForStaticInitCall( 2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2570 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2571 const CGOpenMPRuntime::StaticRTInput &Values) { 2572 if (!CGF.HaveInsertPoint()) 2573 return; 2574 2575 assert(!Values.Ordered); 2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2577 Schedule == OMP_sch_static_balanced_chunked || 2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2579 Schedule == OMP_dist_sch_static || 2580 Schedule == OMP_dist_sch_static_chunked); 2581 2582 // Call __kmpc_for_static_init( 2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2587 llvm::Value *Chunk = Values.Chunk; 2588 if (Chunk == nullptr) { 2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2590 Schedule == OMP_dist_sch_static) && 2591 "expected static non-chunked schedule"); 2592 // If the Chunk was not specified in the clause - use default value 1. 2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2594 } else { 2595 assert((Schedule == OMP_sch_static_chunked || 2596 Schedule == OMP_sch_static_balanced_chunked || 2597 Schedule == OMP_ord_static_chunked || 2598 Schedule == OMP_dist_sch_static_chunked) && 2599 "expected static chunked schedule"); 2600 } 2601 llvm::Value *Args[] = { 2602 UpdateLocation, 2603 ThreadId, 2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2605 M2)), // Schedule type 2606 Values.IL.emitRawPointer(CGF), // &isLastIter 2607 Values.LB.emitRawPointer(CGF), // &LB 2608 Values.UB.emitRawPointer(CGF), // &UB 2609 Values.ST.emitRawPointer(CGF), // &Stride 2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2611 Chunk // Chunk 2612 }; 2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2614 } 2615 emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2616 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2617 SourceLocation Loc, 2618 OpenMPDirectiveKind DKind, 2619 const OpenMPScheduleTy &ScheduleKind, 2620 const StaticRTInput &Values) { 2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2624 "Expected loop-based or sections-based directive."); 2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2626 isOpenMPLoopDirective(DKind) 2627 ? OMP_IDENT_WORK_LOOP 2628 : OMP_IDENT_WORK_SECTIONS); 2629 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2630 llvm::FunctionCallee StaticInitFunction = 2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, 2632 false); 2633 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2636 } 2637 emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2638 void CGOpenMPRuntime::emitDistributeStaticInit( 2639 CodeGenFunction &CGF, SourceLocation Loc, 2640 OpenMPDistScheduleClauseKind SchedKind, 2641 const CGOpenMPRuntime::StaticRTInput &Values) { 2642 OpenMPSchedType ScheduleNum = 2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2644 llvm::Value *UpdatedLocation = 2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2646 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2647 llvm::FunctionCallee StaticInitFunction; 2648 bool isGPUDistribute = 2649 CGM.getLangOpts().OpenMPIsTargetDevice && 2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction( 2652 Values.IVSize, Values.IVSigned, isGPUDistribute); 2653 2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2656 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2657 } 2658 emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2659 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2660 SourceLocation Loc, 2661 OpenMPDirectiveKind DKind) { 2662 assert((DKind == OMPD_distribute || DKind == OMPD_for || 2663 DKind == OMPD_sections) && 2664 "Expected distribute, for, or sections directive kind"); 2665 if (!CGF.HaveInsertPoint()) 2666 return; 2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2668 llvm::Value *Args[] = { 2669 emitUpdateLocation(CGF, Loc, 2670 isOpenMPDistributeDirective(DKind) || 2671 (DKind == OMPD_target_teams_loop) 2672 ? OMP_IDENT_WORK_DISTRIBUTE 2673 : isOpenMPLoopDirective(DKind) 2674 ? OMP_IDENT_WORK_LOOP 2675 : OMP_IDENT_WORK_SECTIONS), 2676 getThreadID(CGF, Loc)}; 2677 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2678 if (isOpenMPDistributeDirective(DKind) && 2679 CGM.getLangOpts().OpenMPIsTargetDevice && 2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2681 CGF.EmitRuntimeCall( 2682 OMPBuilder.getOrCreateRuntimeFunction( 2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2684 Args); 2685 else 2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2688 Args); 2689 } 2690 emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2691 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2692 SourceLocation Loc, 2693 unsigned IVSize, 2694 bool IVSigned) { 2695 if (!CGF.HaveInsertPoint()) 2696 return; 2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), 2700 Args); 2701 } 2702 emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2703 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2704 SourceLocation Loc, unsigned IVSize, 2705 bool IVSigned, Address IL, 2706 Address LB, Address UB, 2707 Address ST) { 2708 // Call __kmpc_dispatch_next( 2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2711 // kmp_int[32|64] *p_stride); 2712 llvm::Value *Args[] = { 2713 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2714 IL.emitRawPointer(CGF), // &isLastIter 2715 LB.emitRawPointer(CGF), // &Lower 2716 UB.emitRawPointer(CGF), // &Upper 2717 ST.emitRawPointer(CGF) // &Stride 2718 }; 2719 llvm::Value *Call = CGF.EmitRuntimeCall( 2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); 2721 return CGF.EmitScalarConversion( 2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2723 CGF.getContext().BoolTy, Loc); 2724 } 2725 emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2726 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2727 llvm::Value *NumThreads, 2728 SourceLocation Loc) { 2729 if (!CGF.HaveInsertPoint()) 2730 return; 2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2732 llvm::Value *Args[] = { 2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2737 Args); 2738 } 2739 emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2740 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2741 ProcBindKind ProcBind, 2742 SourceLocation Loc) { 2743 if (!CGF.HaveInsertPoint()) 2744 return; 2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2752 Args); 2753 } 2754 emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2755 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2756 SourceLocation Loc, llvm::AtomicOrdering AO) { 2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2758 OMPBuilder.createFlush(CGF.Builder); 2759 } else { 2760 if (!CGF.HaveInsertPoint()) 2761 return; 2762 // Build call void __kmpc_flush(ident_t *loc) 2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2764 CGM.getModule(), OMPRTL___kmpc_flush), 2765 emitUpdateLocation(CGF, Loc)); 2766 } 2767 } 2768 2769 namespace { 2770 /// Indexes of fields for type kmp_task_t. 2771 enum KmpTaskTFields { 2772 /// List of shared variables. 2773 KmpTaskTShareds, 2774 /// Task routine. 2775 KmpTaskTRoutine, 2776 /// Partition id for the untied tasks. 2777 KmpTaskTPartId, 2778 /// Function with call of destructors for private variables. 2779 Data1, 2780 /// Task priority. 2781 Data2, 2782 /// (Taskloops only) Lower bound. 2783 KmpTaskTLowerBound, 2784 /// (Taskloops only) Upper bound. 2785 KmpTaskTUpperBound, 2786 /// (Taskloops only) Stride. 2787 KmpTaskTStride, 2788 /// (Taskloops only) Is last iteration flag. 2789 KmpTaskTLastIter, 2790 /// (Taskloops only) Reduction data. 2791 KmpTaskTReductions, 2792 }; 2793 } // anonymous namespace 2794 createOffloadEntriesAndInfoMetadata()2795 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2796 // If we are in simd mode or there are no entries, we don't need to do 2797 // anything. 2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2799 return; 2800 2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2804 SourceLocation Loc; 2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2807 E = CGM.getContext().getSourceManager().fileinfo_end(); 2808 I != E; ++I) { 2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && 2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { 2811 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2812 I->getFirst(), EntryInfo.Line, 1); 2813 break; 2814 } 2815 } 2816 } 2817 switch (Kind) { 2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2819 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2820 DiagnosticsEngine::Error, "Offloading entry for target region in " 2821 "%0 is incorrect: either the " 2822 "address or the ID is invalid."); 2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2824 } break; 2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2826 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2827 DiagnosticsEngine::Error, "Offloading entry for declare target " 2828 "variable %0 is incorrect: the " 2829 "address is invalid."); 2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2831 } break; 2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2833 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2834 DiagnosticsEngine::Error, 2835 "Offloading entry for declare target variable is incorrect: the " 2836 "address is invalid."); 2837 CGM.getDiags().Report(DiagID); 2838 } break; 2839 } 2840 }; 2841 2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 2843 } 2844 emitKmpRoutineEntryT(QualType KmpInt32Ty)2845 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2846 if (!KmpRoutineEntryPtrTy) { 2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2848 ASTContext &C = CGM.getContext(); 2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2850 FunctionProtoType::ExtProtoInfo EPI; 2851 KmpRoutineEntryPtrQTy = C.getPointerType( 2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2853 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2854 } 2855 } 2856 2857 namespace { 2858 struct PrivateHelpersTy { PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 2862 PrivateElemInit(PrivateElemInit) {} PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 2864 const Expr *OriginalRef = nullptr; 2865 const VarDecl *Original = nullptr; 2866 const VarDecl *PrivateCopy = nullptr; 2867 const VarDecl *PrivateElemInit = nullptr; isLocalPrivate__anon93cce0fb0e11::PrivateHelpersTy2868 bool isLocalPrivate() const { 2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 2870 } 2871 }; 2872 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2873 } // anonymous namespace 2874 isAllocatableDecl(const VarDecl * VD)2875 static bool isAllocatableDecl(const VarDecl *VD) { 2876 const VarDecl *CVD = VD->getCanonicalDecl(); 2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 2878 return false; 2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 2880 // Use the default allocation. 2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 2882 !AA->getAllocator()); 2883 } 2884 2885 static RecordDecl * createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2887 if (!Privates.empty()) { 2888 ASTContext &C = CGM.getContext(); 2889 // Build struct .kmp_privates_t. { 2890 // /* private vars */ 2891 // }; 2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 2893 RD->startDefinition(); 2894 for (const auto &Pair : Privates) { 2895 const VarDecl *VD = Pair.second.Original; 2896 QualType Type = VD->getType().getNonReferenceType(); 2897 // If the private variable is a local variable with lvalue ref type, 2898 // allocate the pointer instead of the pointee type. 2899 if (Pair.second.isLocalPrivate()) { 2900 if (VD->getType()->isLValueReferenceType()) 2901 Type = C.getPointerType(Type); 2902 if (isAllocatableDecl(VD)) 2903 Type = C.getPointerType(Type); 2904 } 2905 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 2906 if (VD->hasAttrs()) { 2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2908 E(VD->getAttrs().end()); 2909 I != E; ++I) 2910 FD->addAttr(*I); 2911 } 2912 } 2913 RD->completeDefinition(); 2914 return RD; 2915 } 2916 return nullptr; 2917 } 2918 2919 static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)2920 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 2921 QualType KmpInt32Ty, 2922 QualType KmpRoutineEntryPointerQTy) { 2923 ASTContext &C = CGM.getContext(); 2924 // Build struct kmp_task_t { 2925 // void * shareds; 2926 // kmp_routine_entry_t routine; 2927 // kmp_int32 part_id; 2928 // kmp_cmplrdata_t data1; 2929 // kmp_cmplrdata_t data2; 2930 // For taskloops additional fields: 2931 // kmp_uint64 lb; 2932 // kmp_uint64 ub; 2933 // kmp_int64 st; 2934 // kmp_int32 liter; 2935 // void * reductions; 2936 // }; 2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); 2938 UD->startDefinition(); 2939 addFieldToRecordDecl(C, UD, KmpInt32Ty); 2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 2941 UD->completeDefinition(); 2942 QualType KmpCmplrdataTy = C.getRecordType(UD); 2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 2944 RD->startDefinition(); 2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2947 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2950 if (isOpenMPTaskLoopDirective(Kind)) { 2951 QualType KmpUInt64Ty = 2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 2953 QualType KmpInt64Ty = 2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2957 addFieldToRecordDecl(C, RD, KmpInt64Ty); 2958 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2960 } 2961 RD->completeDefinition(); 2962 return RD; 2963 } 2964 2965 static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)2966 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2967 ArrayRef<PrivateDataTy> Privates) { 2968 ASTContext &C = CGM.getContext(); 2969 // Build struct kmp_task_t_with_privates { 2970 // kmp_task_t task_data; 2971 // .kmp_privates_t. privates; 2972 // }; 2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2974 RD->startDefinition(); 2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2978 RD->completeDefinition(); 2979 return RD; 2980 } 2981 2982 /// Emit a proxy function which accepts kmp_task_t as the second 2983 /// argument. 2984 /// \code 2985 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2986 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 2987 /// For taskloops: 2988 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 2989 /// tt->reductions, tt->shareds); 2990 /// return 0; 2991 /// } 2992 /// \endcode 2993 static llvm::Function * emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)2994 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 2996 QualType KmpTaskTWithPrivatesPtrQTy, 2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2998 QualType SharedsPtrTy, llvm::Function *TaskFunction, 2999 llvm::Value *TaskPrivatesMap) { 3000 ASTContext &C = CGM.getContext(); 3001 FunctionArgList Args; 3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3003 ImplicitParamKind::Other); 3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3006 ImplicitParamKind::Other); 3007 Args.push_back(&GtidArg); 3008 Args.push_back(&TaskTypeArg); 3009 const auto &TaskEntryFnInfo = 3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3011 llvm::FunctionType *TaskEntryTy = 3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3014 auto *TaskEntry = llvm::Function::Create( 3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3017 TaskEntry->setDoesNotRecurse(); 3018 CodeGenFunction CGF(CGM); 3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3020 Loc, Loc); 3021 3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3023 // tt, 3024 // For taskloops: 3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3026 // tt->task_data.shareds); 3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3029 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3030 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3032 const auto *KmpTaskTWithPrivatesQTyRD = 3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3034 LValue Base = 3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3040 3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3045 CGF.ConvertTypeForMem(SharedsPtrTy)); 3046 3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3048 llvm::Value *PrivatesParam; 3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3053 } else { 3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3055 } 3056 3057 llvm::Value *CommonArgs[] = { 3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3059 CGF.Builder 3060 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(), 3061 CGF.VoidPtrTy, CGF.Int8Ty) 3062 .emitRawPointer(CGF)}; 3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3064 std::end(CommonArgs)); 3065 if (isOpenMPTaskLoopDirective(Kind)) { 3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3081 CallArgs.push_back(LBParam); 3082 CallArgs.push_back(UBParam); 3083 CallArgs.push_back(StParam); 3084 CallArgs.push_back(LIParam); 3085 CallArgs.push_back(RParam); 3086 } 3087 CallArgs.push_back(SharedsParam); 3088 3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3090 CallArgs); 3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3093 CGF.FinishFunction(); 3094 return TaskEntry; 3095 } 3096 emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3097 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3098 SourceLocation Loc, 3099 QualType KmpInt32Ty, 3100 QualType KmpTaskTWithPrivatesPtrQTy, 3101 QualType KmpTaskTWithPrivatesQTy) { 3102 ASTContext &C = CGM.getContext(); 3103 FunctionArgList Args; 3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3105 ImplicitParamKind::Other); 3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3108 ImplicitParamKind::Other); 3109 Args.push_back(&GtidArg); 3110 Args.push_back(&TaskTypeArg); 3111 const auto &DestructorFnInfo = 3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3113 llvm::FunctionType *DestructorFnTy = 3114 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3115 std::string Name = 3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3117 auto *DestructorFn = 3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3119 Name, &CGM.getModule()); 3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3121 DestructorFnInfo); 3122 DestructorFn->setDoesNotRecurse(); 3123 CodeGenFunction CGF(CGM); 3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3125 Args, Loc, Loc); 3126 3127 LValue Base = CGF.EmitLoadOfPointerLValue( 3128 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3130 const auto *KmpTaskTWithPrivatesQTyRD = 3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3133 Base = CGF.EmitLValueForField(Base, *FI); 3134 for (const auto *Field : 3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3136 if (QualType::DestructionKind DtorKind = 3137 Field->getType().isDestructedType()) { 3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3140 } 3141 } 3142 CGF.FinishFunction(); 3143 return DestructorFn; 3144 } 3145 3146 /// Emit a privates mapping function for correct handling of private and 3147 /// firstprivate variables. 3148 /// \code 3149 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3150 /// **noalias priv1,..., <tyn> **noalias privn) { 3151 /// *priv1 = &.privates.priv1; 3152 /// ...; 3153 /// *privn = &.privates.privn; 3154 /// } 3155 /// \endcode 3156 static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3157 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3158 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3159 ArrayRef<PrivateDataTy> Privates) { 3160 ASTContext &C = CGM.getContext(); 3161 FunctionArgList Args; 3162 ImplicitParamDecl TaskPrivatesArg( 3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3165 ImplicitParamKind::Other); 3166 Args.push_back(&TaskPrivatesArg); 3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3168 unsigned Counter = 1; 3169 for (const Expr *E : Data.PrivateVars) { 3170 Args.push_back(ImplicitParamDecl::Create( 3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3172 C.getPointerType(C.getPointerType(E->getType())) 3173 .withConst() 3174 .withRestrict(), 3175 ImplicitParamKind::Other)); 3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3177 PrivateVarsPos[VD] = Counter; 3178 ++Counter; 3179 } 3180 for (const Expr *E : Data.FirstprivateVars) { 3181 Args.push_back(ImplicitParamDecl::Create( 3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3183 C.getPointerType(C.getPointerType(E->getType())) 3184 .withConst() 3185 .withRestrict(), 3186 ImplicitParamKind::Other)); 3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3188 PrivateVarsPos[VD] = Counter; 3189 ++Counter; 3190 } 3191 for (const Expr *E : Data.LastprivateVars) { 3192 Args.push_back(ImplicitParamDecl::Create( 3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3194 C.getPointerType(C.getPointerType(E->getType())) 3195 .withConst() 3196 .withRestrict(), 3197 ImplicitParamKind::Other)); 3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3199 PrivateVarsPos[VD] = Counter; 3200 ++Counter; 3201 } 3202 for (const VarDecl *VD : Data.PrivateLocals) { 3203 QualType Ty = VD->getType().getNonReferenceType(); 3204 if (VD->getType()->isLValueReferenceType()) 3205 Ty = C.getPointerType(Ty); 3206 if (isAllocatableDecl(VD)) 3207 Ty = C.getPointerType(Ty); 3208 Args.push_back(ImplicitParamDecl::Create( 3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3211 ImplicitParamKind::Other)); 3212 PrivateVarsPos[VD] = Counter; 3213 ++Counter; 3214 } 3215 const auto &TaskPrivatesMapFnInfo = 3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3217 llvm::FunctionType *TaskPrivatesMapTy = 3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3219 std::string Name = 3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3221 auto *TaskPrivatesMap = llvm::Function::Create( 3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3223 &CGM.getModule()); 3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3225 TaskPrivatesMapFnInfo); 3226 if (CGM.getLangOpts().Optimize) { 3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3230 } 3231 CodeGenFunction CGF(CGM); 3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3233 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3234 3235 // *privi = &.privates.privi; 3236 LValue Base = CGF.EmitLoadOfPointerLValue( 3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3238 TaskPrivatesArg.getType()->castAs<PointerType>()); 3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3240 Counter = 0; 3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3244 LValue RefLVal = 3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3249 ++Counter; 3250 } 3251 CGF.FinishFunction(); 3252 return TaskPrivatesMap; 3253 } 3254 3255 /// Emit initialization for private variables in task-based directives. emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3256 static void emitPrivatesInit(CodeGenFunction &CGF, 3257 const OMPExecutableDirective &D, 3258 Address KmpTaskSharedsPtr, LValue TDBase, 3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3260 QualType SharedsTy, QualType SharedsPtrTy, 3261 const OMPTaskDataTy &Data, 3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3263 ASTContext &C = CGF.getContext(); 3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3267 ? OMPD_taskloop 3268 : OMPD_task; 3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3271 LValue SrcBase; 3272 bool IsTargetTask = 3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3276 // PointersArray, SizesArray, and MappersArray. The original variables for 3277 // these arrays are not captured and we get their addresses explicitly. 3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3280 SrcBase = CGF.MakeAddrLValue( 3281 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3283 CGF.ConvertTypeForMem(SharedsTy)), 3284 SharedsTy); 3285 } 3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3287 for (const PrivateDataTy &Pair : Privates) { 3288 // Do not initialize private locals. 3289 if (Pair.second.isLocalPrivate()) { 3290 ++FI; 3291 continue; 3292 } 3293 const VarDecl *VD = Pair.second.PrivateCopy; 3294 const Expr *Init = VD->getAnyInitializer(); 3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3296 !CGF.isTrivialInitializer(Init)))) { 3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3299 const VarDecl *OriginalVD = Pair.second.Original; 3300 // Check if the variable is the target-based BasePointersArray, 3301 // PointersArray, SizesArray, or MappersArray. 3302 LValue SharedRefLValue; 3303 QualType Type = PrivateLValue.getType(); 3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3305 if (IsTargetTask && !SharedField) { 3306 assert(isa<ImplicitParamDecl>(OriginalVD) && 3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3308 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3309 ->getNumParams() == 0 && 3310 isa<TranslationUnitDecl>( 3311 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3312 ->getDeclContext()) && 3313 "Expected artificial target data variable."); 3314 SharedRefLValue = 3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3316 } else if (ForDup) { 3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3318 SharedRefLValue = CGF.MakeAddrLValue( 3319 SharedRefLValue.getAddress().withAlignment( 3320 C.getDeclAlign(OriginalVD)), 3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3322 SharedRefLValue.getTBAAInfo()); 3323 } else if (CGF.LambdaCaptureFields.count( 3324 Pair.second.Original->getCanonicalDecl()) > 0 || 3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3327 } else { 3328 // Processing for implicitly captured variables. 3329 InlinedOpenMPRegionRAII Region( 3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3331 /*HasCancel=*/false, /*NoInheritance=*/true); 3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3333 } 3334 if (Type->isArrayType()) { 3335 // Initialize firstprivate array. 3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3337 // Perform simple memcpy. 3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3339 } else { 3340 // Initialize firstprivate array using element-by-element 3341 // initialization. 3342 CGF.EmitOMPAggregateAssign( 3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3345 Address SrcElement) { 3346 // Clean up any temporaries needed by the initialization. 3347 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3348 InitScope.addPrivate(Elem, SrcElement); 3349 (void)InitScope.Privatize(); 3350 // Emit initialization for single element. 3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3352 CGF, &CapturesInfo); 3353 CGF.EmitAnyExprToMem(Init, DestElement, 3354 Init->getType().getQualifiers(), 3355 /*IsInitializer=*/false); 3356 }); 3357 } 3358 } else { 3359 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress()); 3361 (void)InitScope.Privatize(); 3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3364 /*capturedByInit=*/false); 3365 } 3366 } else { 3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3368 } 3369 } 3370 ++FI; 3371 } 3372 } 3373 3374 /// Check if duplication function is required for taskloops. checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3375 static bool checkInitIsRequired(CodeGenFunction &CGF, 3376 ArrayRef<PrivateDataTy> Privates) { 3377 bool InitRequired = false; 3378 for (const PrivateDataTy &Pair : Privates) { 3379 if (Pair.second.isLocalPrivate()) 3380 continue; 3381 const VarDecl *VD = Pair.second.PrivateCopy; 3382 const Expr *Init = VD->getAnyInitializer(); 3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3384 !CGF.isTrivialInitializer(Init)); 3385 if (InitRequired) 3386 break; 3387 } 3388 return InitRequired; 3389 } 3390 3391 3392 /// Emit task_dup function (for initialization of 3393 /// private/firstprivate/lastprivate vars and last_iter flag) 3394 /// \code 3395 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3396 /// lastpriv) { 3397 /// // setup lastprivate flag 3398 /// task_dst->last = lastpriv; 3399 /// // could be constructor calls here... 3400 /// } 3401 /// \endcode 3402 static llvm::Value * emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3403 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3404 const OMPExecutableDirective &D, 3405 QualType KmpTaskTWithPrivatesPtrQTy, 3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3410 ASTContext &C = CGM.getContext(); 3411 FunctionArgList Args; 3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3413 KmpTaskTWithPrivatesPtrQTy, 3414 ImplicitParamKind::Other); 3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3416 KmpTaskTWithPrivatesPtrQTy, 3417 ImplicitParamKind::Other); 3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3419 ImplicitParamKind::Other); 3420 Args.push_back(&DstArg); 3421 Args.push_back(&SrcArg); 3422 Args.push_back(&LastprivArg); 3423 const auto &TaskDupFnInfo = 3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3427 auto *TaskDup = llvm::Function::Create( 3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3430 TaskDup->setDoesNotRecurse(); 3431 CodeGenFunction CGF(CGM); 3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3433 Loc); 3434 3435 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3436 CGF.GetAddrOfLocalVar(&DstArg), 3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3438 // task_dst->liter = lastpriv; 3439 if (WithLastIter) { 3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3441 LValue Base = CGF.EmitLValueForField( 3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3447 } 3448 3449 // Emit initial values for private copies (if any). 3450 assert(!Privates.empty()); 3451 Address KmpTaskSharedsPtr = Address::invalid(); 3452 if (!Data.FirstprivateVars.empty()) { 3453 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3454 CGF.GetAddrOfLocalVar(&SrcArg), 3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3456 LValue Base = CGF.EmitLValueForField( 3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3458 KmpTaskSharedsPtr = Address( 3459 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3460 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3461 KmpTaskTShareds)), 3462 Loc), 3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3464 } 3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3467 CGF.FinishFunction(); 3468 return TaskDup; 3469 } 3470 3471 /// Checks if destructor function is required to be generated. 3472 /// \return true if cleanups are required, false otherwise. 3473 static bool checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)3474 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3475 ArrayRef<PrivateDataTy> Privates) { 3476 for (const PrivateDataTy &P : Privates) { 3477 if (P.second.isLocalPrivate()) 3478 continue; 3479 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3480 if (Ty.isDestructedType()) 3481 return true; 3482 } 3483 return false; 3484 } 3485 3486 namespace { 3487 /// Loop generator for OpenMP iterator expression. 3488 class OMPIteratorGeneratorScope final 3489 : public CodeGenFunction::OMPPrivateScope { 3490 CodeGenFunction &CGF; 3491 const OMPIteratorExpr *E = nullptr; 3492 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3493 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3494 OMPIteratorGeneratorScope() = delete; 3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3496 3497 public: OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3500 if (!E) 3501 return; 3502 SmallVector<llvm::Value *, 4> Uppers; 3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3507 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3508 addPrivate( 3509 HelperData.CounterVD, 3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3511 } 3512 Privatize(); 3513 3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3515 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3516 LValue CLVal = 3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3518 HelperData.CounterVD->getType()); 3519 // Counter = 0; 3520 CGF.EmitStoreOfScalar( 3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0), 3522 CLVal); 3523 CodeGenFunction::JumpDest &ContDest = 3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3525 CodeGenFunction::JumpDest &ExitDest = 3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3527 // N = <number-of_iterations>; 3528 llvm::Value *N = Uppers[I]; 3529 // cont: 3530 // if (Counter < N) goto body; else goto exit; 3531 CGF.EmitBlock(ContDest.getBlock()); 3532 auto *CVal = 3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3534 llvm::Value *Cmp = 3535 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3536 ? CGF.Builder.CreateICmpSLT(CVal, N) 3537 : CGF.Builder.CreateICmpULT(CVal, N); 3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3540 // body: 3541 CGF.EmitBlock(BodyBB); 3542 // Iteri = Begini + Counter * Stepi; 3543 CGF.EmitIgnoredExpr(HelperData.Update); 3544 } 3545 } ~OMPIteratorGeneratorScope()3546 ~OMPIteratorGeneratorScope() { 3547 if (!E) 3548 return; 3549 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3550 // Counter = Counter + 1; 3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3553 // goto cont; 3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3555 // exit: 3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3557 } 3558 } 3559 }; 3560 } // namespace 3561 3562 static std::pair<llvm::Value *, llvm::Value *> getPointerAndSize(CodeGenFunction & CGF,const Expr * E)3563 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3565 llvm::Value *Addr; 3566 if (OASE) { 3567 const Expr *Base = OASE->getBase(); 3568 Addr = CGF.EmitScalarExpr(Base); 3569 } else { 3570 Addr = CGF.EmitLValue(E).getPointer(CGF); 3571 } 3572 llvm::Value *SizeVal; 3573 QualType Ty = E->getType(); 3574 if (OASE) { 3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3576 for (const Expr *SE : OASE->getDimensions()) { 3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3578 Sz = CGF.EmitScalarConversion( 3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3581 } 3582 } else if (const auto *ASE = 3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) { 3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false); 3585 Address UpAddrAddress = UpAddrLVal.getAddress(); 3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF), 3588 /*Idx0=*/1); 3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3592 } else { 3593 SizeVal = CGF.getTypeSize(Ty); 3594 } 3595 return std::make_pair(Addr, SizeVal); 3596 } 3597 3598 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)3599 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3601 if (KmpTaskAffinityInfoTy.isNull()) { 3602 RecordDecl *KmpAffinityInfoRD = 3603 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3604 KmpAffinityInfoRD->startDefinition(); 3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3608 KmpAffinityInfoRD->completeDefinition(); 3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3610 } 3611 } 3612 3613 CGOpenMPRuntime::TaskResultTy emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)3614 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3615 const OMPExecutableDirective &D, 3616 llvm::Function *TaskFunction, QualType SharedsTy, 3617 Address Shareds, const OMPTaskDataTy &Data) { 3618 ASTContext &C = CGM.getContext(); 3619 llvm::SmallVector<PrivateDataTy, 4> Privates; 3620 // Aggregate privates and sort them by the alignment. 3621 const auto *I = Data.PrivateCopies.begin(); 3622 for (const Expr *E : Data.PrivateVars) { 3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3624 Privates.emplace_back( 3625 C.getDeclAlign(VD), 3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3627 /*PrivateElemInit=*/nullptr)); 3628 ++I; 3629 } 3630 I = Data.FirstprivateCopies.begin(); 3631 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3632 for (const Expr *E : Data.FirstprivateVars) { 3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3634 Privates.emplace_back( 3635 C.getDeclAlign(VD), 3636 PrivateHelpersTy( 3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3639 ++I; 3640 ++IElemInitRef; 3641 } 3642 I = Data.LastprivateCopies.begin(); 3643 for (const Expr *E : Data.LastprivateVars) { 3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3645 Privates.emplace_back( 3646 C.getDeclAlign(VD), 3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3648 /*PrivateElemInit=*/nullptr)); 3649 ++I; 3650 } 3651 for (const VarDecl *VD : Data.PrivateLocals) { 3652 if (isAllocatableDecl(VD)) 3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3654 else 3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3656 } 3657 llvm::stable_sort(Privates, 3658 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3659 return L.first > R.first; 3660 }); 3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3662 // Build type kmp_routine_entry_t (if not built yet). 3663 emitKmpRoutineEntryT(KmpInt32Ty); 3664 // Build type kmp_task_t (if not built yet). 3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3666 if (SavedKmpTaskloopTQTy.isNull()) { 3667 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3669 } 3670 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3671 } else { 3672 assert((D.getDirectiveKind() == OMPD_task || 3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3675 "Expected taskloop, task or target directive"); 3676 if (SavedKmpTaskTQTy.isNull()) { 3677 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3679 } 3680 KmpTaskTQTy = SavedKmpTaskTQTy; 3681 } 3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3683 // Build particular struct kmp_task_t for the given task. 3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3685 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3687 QualType KmpTaskTWithPrivatesPtrQTy = 3688 C.getPointerType(KmpTaskTWithPrivatesQTy); 3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 3690 llvm::Type *KmpTaskTWithPrivatesPtrTy = 3691 KmpTaskTWithPrivatesTy->getPointerTo(); 3692 llvm::Value *KmpTaskTWithPrivatesTySize = 3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3695 3696 // Emit initial values for private copies (if any). 3697 llvm::Value *TaskPrivatesMap = nullptr; 3698 llvm::Type *TaskPrivatesMapTy = 3699 std::next(TaskFunction->arg_begin(), 3)->getType(); 3700 if (!Privates.empty()) { 3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3702 TaskPrivatesMap = 3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3705 TaskPrivatesMap, TaskPrivatesMapTy); 3706 } else { 3707 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3708 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3709 } 3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3711 // kmp_task_t *tt); 3712 llvm::Function *TaskEntry = emitProxyTaskFunction( 3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3715 TaskPrivatesMap); 3716 3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3719 // kmp_routine_entry_t *task_entry); 3720 // Task flags. Format is taken from 3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3722 // description of kmp_tasking_flags struct. 3723 enum { 3724 TiedFlag = 0x1, 3725 FinalFlag = 0x2, 3726 DestructorsFlag = 0x8, 3727 PriorityFlag = 0x20, 3728 DetachableFlag = 0x40, 3729 }; 3730 unsigned Flags = Data.Tied ? TiedFlag : 0; 3731 bool NeedsCleanup = false; 3732 if (!Privates.empty()) { 3733 NeedsCleanup = 3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3735 if (NeedsCleanup) 3736 Flags = Flags | DestructorsFlag; 3737 } 3738 if (Data.Priority.getInt()) 3739 Flags = Flags | PriorityFlag; 3740 if (D.hasClausesOfKind<OMPDetachClause>()) 3741 Flags = Flags | DetachableFlag; 3742 llvm::Value *TaskFlags = 3743 Data.Final.getPointer() 3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3745 CGF.Builder.getInt32(FinalFlag), 3746 CGF.Builder.getInt32(/*C=*/0)) 3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3750 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3752 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3753 TaskEntry, KmpRoutineEntryPtrTy)}; 3754 llvm::Value *NewTask; 3755 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3756 // Check if we have any device clause associated with the directive. 3757 const Expr *Device = nullptr; 3758 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3759 Device = C->getDevice(); 3760 // Emit device ID if any otherwise use default value. 3761 llvm::Value *DeviceID; 3762 if (Device) 3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3764 CGF.Int64Ty, /*isSigned=*/true); 3765 else 3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3767 AllocArgs.push_back(DeviceID); 3768 NewTask = CGF.EmitRuntimeCall( 3769 OMPBuilder.getOrCreateRuntimeFunction( 3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3771 AllocArgs); 3772 } else { 3773 NewTask = 3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3776 AllocArgs); 3777 } 3778 // Emit detach clause initialization. 3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3780 // task_descriptor); 3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3783 LValue EvtLVal = CGF.EmitLValue(Evt); 3784 3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3786 // int gtid, kmp_task_t *task); 3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3791 OMPBuilder.getOrCreateRuntimeFunction( 3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3793 {Loc, Tid, NewTask}); 3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3795 Evt->getExprLoc()); 3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3797 } 3798 // Process affinity clauses. 3799 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3800 // Process list of affinity data. 3801 ASTContext &C = CGM.getContext(); 3802 Address AffinitiesArray = Address::invalid(); 3803 // Calculate number of elements to form the array of affinity data. 3804 llvm::Value *NumOfElements = nullptr; 3805 unsigned NumAffinities = 0; 3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3807 if (const Expr *Modifier = C->getModifier()) { 3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 3812 NumOfElements = 3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 3814 } 3815 } else { 3816 NumAffinities += C->varlist_size(); 3817 } 3818 } 3819 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 3820 // Fields ids in kmp_task_affinity_info record. 3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 3822 3823 QualType KmpTaskAffinityInfoArrayTy; 3824 if (NumOfElements) { 3825 NumOfElements = CGF.Builder.CreateNUWAdd( 3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 3827 auto *OVE = new (C) OpaqueValueExpr( 3828 Loc, 3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 3830 VK_PRValue); 3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 3832 RValue::get(NumOfElements)); 3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( 3834 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, 3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 3836 // Properly emit variable-sized array. 3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 3838 ImplicitParamKind::Other); 3839 CGF.EmitVarDecl(*PD); 3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 3842 /*isSigned=*/false); 3843 } else { 3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 3845 KmpTaskAffinityInfoTy, 3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 3848 AffinitiesArray = 3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 3852 /*isSigned=*/false); 3853 } 3854 3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 3856 // Fill array by elements without iterators. 3857 unsigned Pos = 0; 3858 bool HasIterator = false; 3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3860 if (C->getModifier()) { 3861 HasIterator = true; 3862 continue; 3863 } 3864 for (const Expr *E : C->varlists()) { 3865 llvm::Value *Addr; 3866 llvm::Value *Size; 3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3868 LValue Base = 3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 3870 KmpTaskAffinityInfoTy); 3871 // affs[i].base_addr = &<Affinities[i].second>; 3872 LValue BaseAddrLVal = CGF.EmitLValueForField( 3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3875 BaseAddrLVal); 3876 // affs[i].len = sizeof(<Affinities[i].second>); 3877 LValue LenLVal = CGF.EmitLValueForField( 3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3879 CGF.EmitStoreOfScalar(Size, LenLVal); 3880 ++Pos; 3881 } 3882 } 3883 LValue PosLVal; 3884 if (HasIterator) { 3885 PosLVal = CGF.MakeAddrLValue( 3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 3887 C.getSizeType()); 3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 3889 } 3890 // Process elements with iterators. 3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3892 const Expr *Modifier = C->getModifier(); 3893 if (!Modifier) 3894 continue; 3895 OMPIteratorGeneratorScope IteratorScope( 3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 3897 for (const Expr *E : C->varlists()) { 3898 llvm::Value *Addr; 3899 llvm::Value *Size; 3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 3902 LValue Base = 3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx), 3904 KmpTaskAffinityInfoTy); 3905 // affs[i].base_addr = &<Affinities[i].second>; 3906 LValue BaseAddrLVal = CGF.EmitLValueForField( 3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3909 BaseAddrLVal); 3910 // affs[i].len = sizeof(<Affinities[i].second>); 3911 LValue LenLVal = CGF.EmitLValueForField( 3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3913 CGF.EmitStoreOfScalar(Size, LenLVal); 3914 Idx = CGF.Builder.CreateNUWAdd( 3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 3916 CGF.EmitStoreOfScalar(Idx, PosLVal); 3917 } 3918 } 3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 3921 // naffins, kmp_task_affinity_info_t *affin_list); 3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 3923 llvm::Value *GTid = getThreadID(CGF, Loc); 3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy); 3926 // FIXME: Emit the function and ignore its result for now unless the 3927 // runtime function is properly implemented. 3928 (void)CGF.EmitRuntimeCall( 3929 OMPBuilder.getOrCreateRuntimeFunction( 3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 3932 } 3933 llvm::Value *NewTaskNewTaskTTy = 3934 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3935 NewTask, KmpTaskTWithPrivatesPtrTy); 3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy, 3937 KmpTaskTWithPrivatesQTy); 3938 LValue TDBase = 3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3940 // Fill the data in the resulting kmp_task_t record. 3941 // Copy shareds if there are any. 3942 Address KmpTaskSharedsPtr = Address::invalid(); 3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3944 KmpTaskSharedsPtr = Address( 3945 CGF.EmitLoadOfScalar( 3946 CGF.EmitLValueForField( 3947 TDBase, 3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 3949 Loc), 3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 3954 } 3955 // Emit initial values for private copies (if any). 3956 TaskResultTy Result; 3957 if (!Privates.empty()) { 3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3959 SharedsTy, SharedsPtrTy, Data, Privates, 3960 /*ForDup=*/false); 3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3963 Result.TaskDupFn = emitTaskDupFunction( 3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3966 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3967 } 3968 } 3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3970 enum { Priority = 0, Destructors = 1 }; 3971 // Provide pointer to function with destructors for privates. 3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3973 const RecordDecl *KmpCmplrdataUD = 3974 (*FI)->getType()->getAsUnionType()->getDecl(); 3975 if (NeedsCleanup) { 3976 llvm::Value *DestructorFn = emitDestructorsFunction( 3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3978 KmpTaskTWithPrivatesQTy); 3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3980 LValue DestructorsLV = CGF.EmitLValueForField( 3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3982 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3983 DestructorFn, KmpRoutineEntryPtrTy), 3984 DestructorsLV); 3985 } 3986 // Set priority. 3987 if (Data.Priority.getInt()) { 3988 LValue Data2LV = CGF.EmitLValueForField( 3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3990 LValue PriorityLV = CGF.EmitLValueForField( 3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3993 } 3994 Result.NewTask = NewTask; 3995 Result.TaskEntry = TaskEntry; 3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3997 Result.TDBase = TDBase; 3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3999 return Result; 4000 } 4001 4002 /// Translates internal dependency kind into the runtime kind. translateDependencyKind(OpenMPDependClauseKind K)4003 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4004 RTLDependenceKindTy DepKind; 4005 switch (K) { 4006 case OMPC_DEPEND_in: 4007 DepKind = RTLDependenceKindTy::DepIn; 4008 break; 4009 // Out and InOut dependencies must use the same code. 4010 case OMPC_DEPEND_out: 4011 case OMPC_DEPEND_inout: 4012 DepKind = RTLDependenceKindTy::DepInOut; 4013 break; 4014 case OMPC_DEPEND_mutexinoutset: 4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 4016 break; 4017 case OMPC_DEPEND_inoutset: 4018 DepKind = RTLDependenceKindTy::DepInOutSet; 4019 break; 4020 case OMPC_DEPEND_outallmemory: 4021 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4022 break; 4023 case OMPC_DEPEND_source: 4024 case OMPC_DEPEND_sink: 4025 case OMPC_DEPEND_depobj: 4026 case OMPC_DEPEND_inoutallmemory: 4027 case OMPC_DEPEND_unknown: 4028 llvm_unreachable("Unknown task dependence type"); 4029 } 4030 return DepKind; 4031 } 4032 4033 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4034 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4035 QualType &FlagsTy) { 4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4037 if (KmpDependInfoTy.isNull()) { 4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4039 KmpDependInfoRD->startDefinition(); 4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4043 KmpDependInfoRD->completeDefinition(); 4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4045 } 4046 } 4047 4048 std::pair<llvm::Value *, LValue> getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4049 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4050 SourceLocation Loc) { 4051 ASTContext &C = CGM.getContext(); 4052 QualType FlagsTy; 4053 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4054 RecordDecl *KmpDependInfoRD = 4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4057 LValue Base = CGF.EmitLoadOfPointerLValue( 4058 DepobjLVal.getAddress().withElementType( 4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4060 KmpDependInfoPtrTy->castAs<PointerType>()); 4061 Address DepObjAddr = CGF.Builder.CreateGEP( 4062 CGF, Base.getAddress(), 4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4064 LValue NumDepsBase = CGF.MakeAddrLValue( 4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4066 // NumDeps = deps[i].base_addr; 4067 LValue BaseAddrLVal = CGF.EmitLValueForField( 4068 NumDepsBase, 4069 *std::next(KmpDependInfoRD->field_begin(), 4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4072 return std::make_pair(NumDeps, Base); 4073 } 4074 emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4075 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4076 llvm::PointerUnion<unsigned *, LValue *> Pos, 4077 const OMPTaskDataTy::DependData &Data, 4078 Address DependenciesArray) { 4079 CodeGenModule &CGM = CGF.CGM; 4080 ASTContext &C = CGM.getContext(); 4081 QualType FlagsTy; 4082 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4083 RecordDecl *KmpDependInfoRD = 4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4086 4087 OMPIteratorGeneratorScope IteratorScope( 4088 CGF, cast_or_null<OMPIteratorExpr>( 4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4090 : nullptr)); 4091 for (const Expr *E : Data.DepExprs) { 4092 llvm::Value *Addr; 4093 llvm::Value *Size; 4094 4095 // The expression will be a nullptr in the 'omp_all_memory' case. 4096 if (E) { 4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4099 } else { 4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4102 } 4103 LValue Base; 4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4105 Base = CGF.MakeAddrLValue( 4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4107 } else { 4108 assert(E && "Expected a non-null expression"); 4109 LValue &PosLVal = *Pos.get<LValue *>(); 4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4111 Base = CGF.MakeAddrLValue( 4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy); 4113 } 4114 // deps[i].base_addr = &<Dependencies[i].second>; 4115 LValue BaseAddrLVal = CGF.EmitLValueForField( 4116 Base, 4117 *std::next(KmpDependInfoRD->field_begin(), 4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4120 // deps[i].len = sizeof(<Dependencies[i].second>); 4121 LValue LenLVal = CGF.EmitLValueForField( 4122 Base, *std::next(KmpDependInfoRD->field_begin(), 4123 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4124 CGF.EmitStoreOfScalar(Size, LenLVal); 4125 // deps[i].flags = <Dependencies[i].first>; 4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4127 LValue FlagsLVal = CGF.EmitLValueForField( 4128 Base, 4129 *std::next(KmpDependInfoRD->field_begin(), 4130 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4131 CGF.EmitStoreOfScalar( 4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4133 FlagsLVal); 4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4135 ++(*P); 4136 } else { 4137 LValue &PosLVal = *Pos.get<LValue *>(); 4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4139 Idx = CGF.Builder.CreateNUWAdd(Idx, 4140 llvm::ConstantInt::get(Idx->getType(), 1)); 4141 CGF.EmitStoreOfScalar(Idx, PosLVal); 4142 } 4143 } 4144 } 4145 emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4146 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4148 const OMPTaskDataTy::DependData &Data) { 4149 assert(Data.DepKind == OMPC_DEPEND_depobj && 4150 "Expected depobj dependency kind."); 4151 SmallVector<llvm::Value *, 4> Sizes; 4152 SmallVector<LValue, 4> SizeLVals; 4153 ASTContext &C = CGF.getContext(); 4154 { 4155 OMPIteratorGeneratorScope IteratorScope( 4156 CGF, cast_or_null<OMPIteratorExpr>( 4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4158 : nullptr)); 4159 for (const Expr *E : Data.DepExprs) { 4160 llvm::Value *NumDeps; 4161 LValue Base; 4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4163 std::tie(NumDeps, Base) = 4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4165 LValue NumLVal = CGF.MakeAddrLValue( 4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4167 C.getUIntPtrType()); 4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4169 NumLVal.getAddress()); 4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4172 CGF.EmitStoreOfScalar(Add, NumLVal); 4173 SizeLVals.push_back(NumLVal); 4174 } 4175 } 4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4177 llvm::Value *Size = 4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4179 Sizes.push_back(Size); 4180 } 4181 return Sizes; 4182 } 4183 emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4184 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4185 QualType &KmpDependInfoTy, 4186 LValue PosLVal, 4187 const OMPTaskDataTy::DependData &Data, 4188 Address DependenciesArray) { 4189 assert(Data.DepKind == OMPC_DEPEND_depobj && 4190 "Expected depobj dependency kind."); 4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4192 { 4193 OMPIteratorGeneratorScope IteratorScope( 4194 CGF, cast_or_null<OMPIteratorExpr>( 4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4196 : nullptr)); 4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4198 const Expr *E = Data.DepExprs[I]; 4199 llvm::Value *NumDeps; 4200 LValue Base; 4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4202 std::tie(NumDeps, Base) = 4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4204 4205 // memcopy dependency data. 4206 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4207 ElSize, 4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos); 4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size); 4212 4213 // Increase pos. 4214 // pos += size; 4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4216 CGF.EmitStoreOfScalar(Add, PosLVal); 4217 } 4218 } 4219 } 4220 emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4221 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4222 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4223 SourceLocation Loc) { 4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4225 return D.DepExprs.empty(); 4226 })) 4227 return std::make_pair(nullptr, Address::invalid()); 4228 // Process list of dependencies. 4229 ASTContext &C = CGM.getContext(); 4230 Address DependenciesArray = Address::invalid(); 4231 llvm::Value *NumOfElements = nullptr; 4232 unsigned NumDependencies = std::accumulate( 4233 Dependencies.begin(), Dependencies.end(), 0, 4234 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4235 return D.DepKind == OMPC_DEPEND_depobj 4236 ? V 4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4238 }); 4239 QualType FlagsTy; 4240 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4241 bool HasDepobjDeps = false; 4242 bool HasRegularWithIterators = false; 4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4244 llvm::Value *NumOfRegularWithIterators = 4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4246 // Calculate number of depobj dependencies and regular deps with the 4247 // iterators. 4248 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4249 if (D.DepKind == OMPC_DEPEND_depobj) { 4250 SmallVector<llvm::Value *, 4> Sizes = 4251 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4252 for (llvm::Value *Size : Sizes) { 4253 NumOfDepobjElements = 4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4255 } 4256 HasDepobjDeps = true; 4257 continue; 4258 } 4259 // Include number of iterations, if any. 4260 4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4262 llvm::Value *ClauseIteratorSpace = 4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace); 4268 } 4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4270 ClauseIteratorSpace, 4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4272 NumOfRegularWithIterators = 4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4274 HasRegularWithIterators = true; 4275 continue; 4276 } 4277 } 4278 4279 QualType KmpDependInfoArrayTy; 4280 if (HasDepobjDeps || HasRegularWithIterators) { 4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4282 /*isSigned=*/false); 4283 if (HasDepobjDeps) { 4284 NumOfElements = 4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4286 } 4287 if (HasRegularWithIterators) { 4288 NumOfElements = 4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4290 } 4291 auto *OVE = new (C) OpaqueValueExpr( 4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4293 VK_PRValue); 4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4295 RValue::get(NumOfElements)); 4296 KmpDependInfoArrayTy = 4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, 4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4300 // Properly emit variable-sized array. 4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4302 ImplicitParamKind::Other); 4303 CGF.EmitVarDecl(*PD); 4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4306 /*isSigned=*/false); 4307 } else { 4308 KmpDependInfoArrayTy = C.getConstantArrayType( 4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4311 DependenciesArray = 4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4315 /*isSigned=*/false); 4316 } 4317 unsigned Pos = 0; 4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4320 Dependencies[I].IteratorExpr) 4321 continue; 4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4323 DependenciesArray); 4324 } 4325 // Copy regular dependencies with iterators. 4326 LValue PosLVal = CGF.MakeAddrLValue( 4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4331 !Dependencies[I].IteratorExpr) 4332 continue; 4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4334 DependenciesArray); 4335 } 4336 // Copy final depobj arrays without iterators. 4337 if (HasDepobjDeps) { 4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4340 continue; 4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4342 DependenciesArray); 4343 } 4344 } 4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4347 return std::make_pair(NumOfElements, DependenciesArray); 4348 } 4349 emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4350 Address CGOpenMPRuntime::emitDepobjDependClause( 4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4352 SourceLocation Loc) { 4353 if (Dependencies.DepExprs.empty()) 4354 return Address::invalid(); 4355 // Process list of dependencies. 4356 ASTContext &C = CGM.getContext(); 4357 Address DependenciesArray = Address::invalid(); 4358 unsigned NumDependencies = Dependencies.DepExprs.size(); 4359 QualType FlagsTy; 4360 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4361 RecordDecl *KmpDependInfoRD = 4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4363 4364 llvm::Value *Size; 4365 // Define type kmp_depend_info[<Dependencies.size()>]; 4366 // For depobj reserve one extra element to store the number of elements. 4367 // It is required to handle depobj(x) update(in) construct. 4368 // kmp_depend_info[<Dependencies.size()>] deps; 4369 llvm::Value *NumDepsVal; 4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4371 if (const auto *IE = 4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4378 } 4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4380 NumDepsVal); 4381 CharUnits SizeInBytes = 4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4385 NumDepsVal = 4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4387 } else { 4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4392 Size = CGM.getSize(Sz.alignTo(Align)); 4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4394 } 4395 // Need to allocate on the dynamic memory. 4396 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4397 // Use default allocator. 4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4399 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4400 4401 llvm::Value *Addr = 4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4403 CGM.getModule(), OMPRTL___kmpc_alloc), 4404 Args, ".dep.arr.addr"); 4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4406 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4407 Addr, KmpDependInfoLlvmTy->getPointerTo()); 4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4409 // Write number of elements in the first element of array for depobj. 4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4411 // deps[i].base_addr = NumDependencies; 4412 LValue BaseAddrLVal = CGF.EmitLValueForField( 4413 Base, 4414 *std::next(KmpDependInfoRD->field_begin(), 4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4417 llvm::PointerUnion<unsigned *, LValue *> Pos; 4418 unsigned Idx = 1; 4419 LValue PosLVal; 4420 if (Dependencies.IteratorExpr) { 4421 PosLVal = CGF.MakeAddrLValue( 4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4423 C.getSizeType()); 4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4425 /*IsInit=*/true); 4426 Pos = &PosLVal; 4427 } else { 4428 Pos = &Idx; 4429 } 4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4433 CGF.Int8Ty); 4434 return DependenciesArray; 4435 } 4436 emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4437 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4438 SourceLocation Loc) { 4439 ASTContext &C = CGM.getContext(); 4440 QualType FlagsTy; 4441 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(), 4443 C.VoidPtrTy.castAs<PointerType>()); 4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4445 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4447 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4449 Addr.getElementType(), Addr.emitRawPointer(CGF), 4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4452 CGF.VoidPtrTy); 4453 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4454 // Use default allocator. 4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4457 4458 // _kmpc_free(gtid, addr, nullptr); 4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4460 CGM.getModule(), OMPRTL___kmpc_free), 4461 Args); 4462 } 4463 emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4464 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4465 OpenMPDependClauseKind NewDepKind, 4466 SourceLocation Loc) { 4467 ASTContext &C = CGM.getContext(); 4468 QualType FlagsTy; 4469 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4470 RecordDecl *KmpDependInfoRD = 4471 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4473 llvm::Value *NumDeps; 4474 LValue Base; 4475 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4476 4477 Address Begin = Base.getAddress(); 4478 // Cast from pointer to array type to pointer to single element. 4479 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(), 4480 Begin.emitRawPointer(CGF), NumDeps); 4481 // The basic structure here is a while-do loop. 4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4485 CGF.EmitBlock(BodyBB); 4486 llvm::PHINode *ElementPHI = 4487 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4488 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB); 4489 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4490 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4491 Base.getTBAAInfo()); 4492 // deps[i].flags = NewDepKind; 4493 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4494 LValue FlagsLVal = CGF.EmitLValueForField( 4495 Base, *std::next(KmpDependInfoRD->field_begin(), 4496 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4497 CGF.EmitStoreOfScalar( 4498 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4499 FlagsLVal); 4500 4501 // Shift the address forward by one element. 4502 llvm::Value *ElementNext = 4503 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext") 4504 .emitRawPointer(CGF); 4505 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock()); 4506 llvm::Value *IsEmpty = 4507 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty"); 4508 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4509 // Done. 4510 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4511 } 4512 emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4513 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4514 const OMPExecutableDirective &D, 4515 llvm::Function *TaskFunction, 4516 QualType SharedsTy, Address Shareds, 4517 const Expr *IfCond, 4518 const OMPTaskDataTy &Data) { 4519 if (!CGF.HaveInsertPoint()) 4520 return; 4521 4522 TaskResultTy Result = 4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4524 llvm::Value *NewTask = Result.NewTask; 4525 llvm::Function *TaskEntry = Result.TaskEntry; 4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4527 LValue TDBase = Result.TDBase; 4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4529 // Process list of dependences. 4530 Address DependenciesArray = Address::invalid(); 4531 llvm::Value *NumOfElements; 4532 std::tie(NumOfElements, DependenciesArray) = 4533 emitDependClause(CGF, Data.Dependences, Loc); 4534 4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4536 // libcall. 4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4540 // list is not empty 4541 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4544 llvm::Value *DepTaskArgs[7]; 4545 if (!Data.Dependences.empty()) { 4546 DepTaskArgs[0] = UpLoc; 4547 DepTaskArgs[1] = ThreadID; 4548 DepTaskArgs[2] = NewTask; 4549 DepTaskArgs[3] = NumOfElements; 4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); 4551 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4553 } 4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4556 if (!Data.Tied) { 4557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4558 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4559 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4560 } 4561 if (!Data.Dependences.empty()) { 4562 CGF.EmitRuntimeCall( 4563 OMPBuilder.getOrCreateRuntimeFunction( 4564 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4565 DepTaskArgs); 4566 } else { 4567 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4568 CGM.getModule(), OMPRTL___kmpc_omp_task), 4569 TaskArgs); 4570 } 4571 // Check if parent region is untied and build return for untied task; 4572 if (auto *Region = 4573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4574 Region->emitUntiedSwitch(CGF); 4575 }; 4576 4577 llvm::Value *DepWaitTaskArgs[7]; 4578 if (!Data.Dependences.empty()) { 4579 DepWaitTaskArgs[0] = UpLoc; 4580 DepWaitTaskArgs[1] = ThreadID; 4581 DepWaitTaskArgs[2] = NumOfElements; 4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4585 DepWaitTaskArgs[6] = 4586 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4587 } 4588 auto &M = CGM.getModule(); 4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4590 TaskEntry, &Data, &DepWaitTaskArgs, 4591 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4596 // is specified. 4597 if (!Data.Dependences.empty()) 4598 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4599 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4600 DepWaitTaskArgs); 4601 // Call proxy_task_entry(gtid, new_task); 4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4604 Action.Enter(CGF); 4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4607 OutlinedFnArgs); 4608 }; 4609 4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4611 // kmp_task_t *new_task); 4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4613 // kmp_task_t *new_task); 4614 RegionCodeGenTy RCG(CodeGen); 4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4616 M, OMPRTL___kmpc_omp_task_begin_if0), 4617 TaskArgs, 4618 OMPBuilder.getOrCreateRuntimeFunction( 4619 M, OMPRTL___kmpc_omp_task_complete_if0), 4620 TaskArgs); 4621 RCG.setAction(Action); 4622 RCG(CGF); 4623 }; 4624 4625 if (IfCond) { 4626 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4627 } else { 4628 RegionCodeGenTy ThenRCG(ThenCodeGen); 4629 ThenRCG(CGF); 4630 } 4631 } 4632 emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4633 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4634 const OMPLoopDirective &D, 4635 llvm::Function *TaskFunction, 4636 QualType SharedsTy, Address Shareds, 4637 const Expr *IfCond, 4638 const OMPTaskDataTy &Data) { 4639 if (!CGF.HaveInsertPoint()) 4640 return; 4641 TaskResultTy Result = 4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4644 // libcall. 4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4647 // sched, kmp_uint64 grainsize, void *task_dup); 4648 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4650 llvm::Value *IfVal; 4651 if (IfCond) { 4652 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4653 /*isSigned=*/true); 4654 } else { 4655 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4656 } 4657 4658 LValue LBLVal = CGF.EmitLValueForField( 4659 Result.TDBase, 4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4661 const auto *LBVar = 4662 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4663 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4664 /*IsInitializer=*/true); 4665 LValue UBLVal = CGF.EmitLValueForField( 4666 Result.TDBase, 4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4668 const auto *UBVar = 4669 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4670 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4671 /*IsInitializer=*/true); 4672 LValue StLVal = CGF.EmitLValueForField( 4673 Result.TDBase, 4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4675 const auto *StVar = 4676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4678 /*IsInitializer=*/true); 4679 // Store reductions address. 4680 LValue RedLVal = CGF.EmitLValueForField( 4681 Result.TDBase, 4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4683 if (Data.Reductions) { 4684 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4685 } else { 4686 CGF.EmitNullInitialization(RedLVal.getAddress(), 4687 CGF.getContext().VoidPtrTy); 4688 } 4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4690 llvm::Value *TaskArgs[] = { 4691 UpLoc, 4692 ThreadID, 4693 Result.NewTask, 4694 IfVal, 4695 LBLVal.getPointer(CGF), 4696 UBLVal.getPointer(CGF), 4697 CGF.EmitLoadOfScalar(StLVal, Loc), 4698 llvm::ConstantInt::getSigned( 4699 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4700 llvm::ConstantInt::getSigned( 4701 CGF.IntTy, Data.Schedule.getPointer() 4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4703 : NoSchedule), 4704 Data.Schedule.getPointer() 4705 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4706 /*isSigned=*/false) 4707 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 4708 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4709 Result.TaskDupFn, CGF.VoidPtrTy) 4710 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 4711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4712 CGM.getModule(), OMPRTL___kmpc_taskloop), 4713 TaskArgs); 4714 } 4715 4716 /// Emit reduction operation for each element of array (required for 4717 /// array sections) LHS op = RHS. 4718 /// \param Type Type of array. 4719 /// \param LHSVar Variable on the left side of the reduction operation 4720 /// (references element of array in original variable). 4721 /// \param RHSVar Variable on the right side of the reduction operation 4722 /// (references element of array in original variable). 4723 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4724 /// RHSVar. EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)4725 static void EmitOMPAggregateReduction( 4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4727 const VarDecl *RHSVar, 4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4729 const Expr *, const Expr *)> &RedOpGen, 4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4731 const Expr *UpExpr = nullptr) { 4732 // Perform element-by-element initialization. 4733 QualType ElementTy; 4734 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4735 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4736 4737 // Drill down to the base element type on both arrays. 4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4739 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4740 4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF); 4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF); 4743 // Cast from pointer to array type to pointer to single element. 4744 llvm::Value *LHSEnd = 4745 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4746 // The basic structure here is a while-do loop. 4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4749 llvm::Value *IsEmpty = 4750 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4751 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4752 4753 // Enter the loop body, making that address the current address. 4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4755 CGF.EmitBlock(BodyBB); 4756 4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4758 4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4760 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4761 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4762 Address RHSElementCurrent( 4763 RHSElementPHI, RHSAddr.getElementType(), 4764 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4765 4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4767 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4768 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4769 Address LHSElementCurrent( 4770 LHSElementPHI, LHSAddr.getElementType(), 4771 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4772 4773 // Emit copy. 4774 CodeGenFunction::OMPPrivateScope Scope(CGF); 4775 Scope.addPrivate(LHSVar, LHSElementCurrent); 4776 Scope.addPrivate(RHSVar, RHSElementCurrent); 4777 Scope.Privatize(); 4778 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4779 Scope.ForceCleanup(); 4780 4781 // Shift the address forward by one element. 4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4783 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4784 "omp.arraycpy.dest.element"); 4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4786 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4787 "omp.arraycpy.src.element"); 4788 // Check whether we've reached the end. 4789 llvm::Value *Done = 4790 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4791 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4792 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4793 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4794 4795 // Done. 4796 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4797 } 4798 4799 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4800 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4801 /// UDR combiner function. emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)4802 static void emitReductionCombiner(CodeGenFunction &CGF, 4803 const Expr *ReductionOp) { 4804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4806 if (const auto *DRE = 4807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4808 if (const auto *DRD = 4809 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4810 std::pair<llvm::Function *, llvm::Function *> Reduction = 4811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4812 RValue Func = RValue::get(Reduction.first); 4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4814 CGF.EmitIgnoredExpr(ReductionOp); 4815 return; 4816 } 4817 CGF.EmitIgnoredExpr(ReductionOp); 4818 } 4819 emitReductionFunction(StringRef ReducerName,SourceLocation Loc,llvm::Type * ArgsElemType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)4820 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 4822 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4824 ASTContext &C = CGM.getContext(); 4825 4826 // void reduction_func(void *LHSArg, void *RHSArg); 4827 FunctionArgList Args; 4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4829 ImplicitParamKind::Other); 4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4831 ImplicitParamKind::Other); 4832 Args.push_back(&LHSArg); 4833 Args.push_back(&RHSArg); 4834 const auto &CGFI = 4835 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4836 std::string Name = getReductionFuncName(ReducerName); 4837 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 4838 llvm::GlobalValue::InternalLinkage, Name, 4839 &CGM.getModule()); 4840 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4841 Fn->setDoesNotRecurse(); 4842 CodeGenFunction CGF(CGM); 4843 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4844 4845 // Dst = (void*[n])(LHSArg); 4846 // Src = (void*[n])(RHSArg); 4847 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4848 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4849 ArgsElemType->getPointerTo()), 4850 ArgsElemType, CGF.getPointerAlign()); 4851 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4853 ArgsElemType->getPointerTo()), 4854 ArgsElemType, CGF.getPointerAlign()); 4855 4856 // ... 4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4858 // ... 4859 CodeGenFunction::OMPPrivateScope Scope(CGF); 4860 const auto *IPriv = Privates.begin(); 4861 unsigned Idx = 0; 4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4863 const auto *RHSVar = 4864 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4865 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 4866 const auto *LHSVar = 4867 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4868 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 4869 QualType PrivTy = (*IPriv)->getType(); 4870 if (PrivTy->isVariablyModifiedType()) { 4871 // Get array size and emit VLA type. 4872 ++Idx; 4873 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4875 const VariableArrayType *VLA = 4876 CGF.getContext().getAsVariableArrayType(PrivTy); 4877 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4878 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4879 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4880 CGF.EmitVariablyModifiedType(PrivTy); 4881 } 4882 } 4883 Scope.Privatize(); 4884 IPriv = Privates.begin(); 4885 const auto *ILHS = LHSExprs.begin(); 4886 const auto *IRHS = RHSExprs.begin(); 4887 for (const Expr *E : ReductionOps) { 4888 if ((*IPriv)->getType()->isArrayType()) { 4889 // Emit reduction for array section. 4890 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4891 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4892 EmitOMPAggregateReduction( 4893 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4894 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4895 emitReductionCombiner(CGF, E); 4896 }); 4897 } else { 4898 // Emit reduction for array subscript or single variable. 4899 emitReductionCombiner(CGF, E); 4900 } 4901 ++IPriv; 4902 ++ILHS; 4903 ++IRHS; 4904 } 4905 Scope.ForceCleanup(); 4906 CGF.FinishFunction(); 4907 return Fn; 4908 } 4909 emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)4910 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4911 const Expr *ReductionOp, 4912 const Expr *PrivateRef, 4913 const DeclRefExpr *LHS, 4914 const DeclRefExpr *RHS) { 4915 if (PrivateRef->getType()->isArrayType()) { 4916 // Emit reduction for array section. 4917 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4918 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4919 EmitOMPAggregateReduction( 4920 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4921 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4922 emitReductionCombiner(CGF, ReductionOp); 4923 }); 4924 } else { 4925 // Emit reduction for array subscript or single variable. 4926 emitReductionCombiner(CGF, ReductionOp); 4927 } 4928 } 4929 emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)4930 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4931 ArrayRef<const Expr *> Privates, 4932 ArrayRef<const Expr *> LHSExprs, 4933 ArrayRef<const Expr *> RHSExprs, 4934 ArrayRef<const Expr *> ReductionOps, 4935 ReductionOptionsTy Options) { 4936 if (!CGF.HaveInsertPoint()) 4937 return; 4938 4939 bool WithNowait = Options.WithNowait; 4940 bool SimpleReduction = Options.SimpleReduction; 4941 4942 // Next code should be emitted for reduction: 4943 // 4944 // static kmp_critical_name lock = { 0 }; 4945 // 4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4948 // ... 4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4950 // *(Type<n>-1*)rhs[<n>-1]); 4951 // } 4952 // 4953 // ... 4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4956 // RedList, reduce_func, &<lock>)) { 4957 // case 1: 4958 // ... 4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4960 // ... 4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4962 // break; 4963 // case 2: 4964 // ... 4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4966 // ... 4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4968 // break; 4969 // default:; 4970 // } 4971 // 4972 // if SimpleReduction is true, only the next code is generated: 4973 // ... 4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4975 // ... 4976 4977 ASTContext &C = CGM.getContext(); 4978 4979 if (SimpleReduction) { 4980 CodeGenFunction::RunCleanupsScope Scope(CGF); 4981 const auto *IPriv = Privates.begin(); 4982 const auto *ILHS = LHSExprs.begin(); 4983 const auto *IRHS = RHSExprs.begin(); 4984 for (const Expr *E : ReductionOps) { 4985 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4986 cast<DeclRefExpr>(*IRHS)); 4987 ++IPriv; 4988 ++ILHS; 4989 ++IRHS; 4990 } 4991 return; 4992 } 4993 4994 // 1. Build a list of reduction variables. 4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4996 auto Size = RHSExprs.size(); 4997 for (const Expr *E : Privates) { 4998 if (E->getType()->isVariablyModifiedType()) 4999 // Reserve place for array size. 5000 ++Size; 5001 } 5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5003 QualType ReductionArrayTy = C.getConstantArrayType( 5004 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 5005 /*IndexTypeQuals=*/0); 5006 RawAddress ReductionList = 5007 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5008 const auto *IPriv = Privates.begin(); 5009 unsigned Idx = 0; 5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5011 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5012 CGF.Builder.CreateStore( 5013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5014 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5015 Elem); 5016 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5017 // Store array size. 5018 ++Idx; 5019 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5020 llvm::Value *Size = CGF.Builder.CreateIntCast( 5021 CGF.getVLASize( 5022 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5023 .NumElts, 5024 CGF.SizeTy, /*isSigned=*/false); 5025 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5026 Elem); 5027 } 5028 } 5029 5030 // 2. Emit reduce_func(). 5031 llvm::Function *ReductionFn = emitReductionFunction( 5032 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5033 Privates, LHSExprs, RHSExprs, ReductionOps); 5034 5035 // 3. Create static kmp_critical_name lock = { 0 }; 5036 std::string Name = getName({"reduction"}); 5037 llvm::Value *Lock = getCriticalRegionLock(Name); 5038 5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5040 // RedList, reduce_func, &<lock>); 5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5042 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 ReductionList.getPointer(), CGF.VoidPtrTy); 5046 llvm::Value *Args[] = { 5047 IdentTLoc, // ident_t *<loc> 5048 ThreadId, // i32 <gtid> 5049 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5050 ReductionArrayTySize, // size_type sizeof(RedList) 5051 RL, // void *RedList 5052 ReductionFn, // void (*) (void *, void *) <reduce_func> 5053 Lock // kmp_critical_name *&<lock> 5054 }; 5055 llvm::Value *Res = CGF.EmitRuntimeCall( 5056 OMPBuilder.getOrCreateRuntimeFunction( 5057 CGM.getModule(), 5058 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5059 Args); 5060 5061 // 5. Build switch(res) 5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5063 llvm::SwitchInst *SwInst = 5064 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5065 5066 // 6. Build case 1: 5067 // ... 5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5069 // ... 5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5071 // break; 5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5073 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5074 CGF.EmitBlock(Case1BB); 5075 5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5077 llvm::Value *EndArgs[] = { 5078 IdentTLoc, // ident_t *<loc> 5079 ThreadId, // i32 <gtid> 5080 Lock // kmp_critical_name *&<lock> 5081 }; 5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5083 CodeGenFunction &CGF, PrePostActionTy &Action) { 5084 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5085 const auto *IPriv = Privates.begin(); 5086 const auto *ILHS = LHSExprs.begin(); 5087 const auto *IRHS = RHSExprs.begin(); 5088 for (const Expr *E : ReductionOps) { 5089 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5090 cast<DeclRefExpr>(*IRHS)); 5091 ++IPriv; 5092 ++ILHS; 5093 ++IRHS; 5094 } 5095 }; 5096 RegionCodeGenTy RCG(CodeGen); 5097 CommonActionTy Action( 5098 nullptr, std::nullopt, 5099 OMPBuilder.getOrCreateRuntimeFunction( 5100 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5101 : OMPRTL___kmpc_end_reduce), 5102 EndArgs); 5103 RCG.setAction(Action); 5104 RCG(CGF); 5105 5106 CGF.EmitBranch(DefaultBB); 5107 5108 // 7. Build case 2: 5109 // ... 5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5111 // ... 5112 // break; 5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5114 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5115 CGF.EmitBlock(Case2BB); 5116 5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5118 CodeGenFunction &CGF, PrePostActionTy &Action) { 5119 const auto *ILHS = LHSExprs.begin(); 5120 const auto *IRHS = RHSExprs.begin(); 5121 const auto *IPriv = Privates.begin(); 5122 for (const Expr *E : ReductionOps) { 5123 const Expr *XExpr = nullptr; 5124 const Expr *EExpr = nullptr; 5125 const Expr *UpExpr = nullptr; 5126 BinaryOperatorKind BO = BO_Comma; 5127 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5128 if (BO->getOpcode() == BO_Assign) { 5129 XExpr = BO->getLHS(); 5130 UpExpr = BO->getRHS(); 5131 } 5132 } 5133 // Try to emit update expression as a simple atomic. 5134 const Expr *RHSExpr = UpExpr; 5135 if (RHSExpr) { 5136 // Analyze RHS part of the whole expression. 5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5138 RHSExpr->IgnoreParenImpCasts())) { 5139 // If this is a conditional operator, analyze its condition for 5140 // min/max reduction operator. 5141 RHSExpr = ACO->getCond(); 5142 } 5143 if (const auto *BORHS = 5144 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5145 EExpr = BORHS->getRHS(); 5146 BO = BORHS->getOpcode(); 5147 } 5148 } 5149 if (XExpr) { 5150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5151 auto &&AtomicRedGen = [BO, VD, 5152 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5153 const Expr *EExpr, const Expr *UpExpr) { 5154 LValue X = CGF.EmitLValue(XExpr); 5155 RValue E; 5156 if (EExpr) 5157 E = CGF.EmitAnyExpr(EExpr); 5158 CGF.EmitOMPAtomicSimpleUpdateExpr( 5159 X, E, BO, /*IsXLHSInRHSPart=*/true, 5160 llvm::AtomicOrdering::Monotonic, Loc, 5161 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5163 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5164 CGF.emitOMPSimpleStore( 5165 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5166 VD->getType().getNonReferenceType(), Loc); 5167 PrivateScope.addPrivate(VD, LHSTemp); 5168 (void)PrivateScope.Privatize(); 5169 return CGF.EmitAnyExpr(UpExpr); 5170 }); 5171 }; 5172 if ((*IPriv)->getType()->isArrayType()) { 5173 // Emit atomic reduction for array section. 5174 const auto *RHSVar = 5175 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5176 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5177 AtomicRedGen, XExpr, EExpr, UpExpr); 5178 } else { 5179 // Emit atomic reduction for array subscript or single variable. 5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5181 } 5182 } else { 5183 // Emit as a critical region. 5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5185 const Expr *, const Expr *) { 5186 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5187 std::string Name = RT.getName({"atomic_reduction"}); 5188 RT.emitCriticalRegion( 5189 CGF, Name, 5190 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5191 Action.Enter(CGF); 5192 emitReductionCombiner(CGF, E); 5193 }, 5194 Loc); 5195 }; 5196 if ((*IPriv)->getType()->isArrayType()) { 5197 const auto *LHSVar = 5198 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5199 const auto *RHSVar = 5200 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5201 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5202 CritRedGen); 5203 } else { 5204 CritRedGen(CGF, nullptr, nullptr, nullptr); 5205 } 5206 } 5207 ++ILHS; 5208 ++IRHS; 5209 ++IPriv; 5210 } 5211 }; 5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5213 if (!WithNowait) { 5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5215 llvm::Value *EndArgs[] = { 5216 IdentTLoc, // ident_t *<loc> 5217 ThreadId, // i32 <gtid> 5218 Lock // kmp_critical_name *&<lock> 5219 }; 5220 CommonActionTy Action(nullptr, std::nullopt, 5221 OMPBuilder.getOrCreateRuntimeFunction( 5222 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5223 EndArgs); 5224 AtomicRCG.setAction(Action); 5225 AtomicRCG(CGF); 5226 } else { 5227 AtomicRCG(CGF); 5228 } 5229 5230 CGF.EmitBranch(DefaultBB); 5231 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5232 } 5233 5234 /// Generates unique name for artificial threadprivate variables. 5235 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5236 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5237 const Expr *Ref) { 5238 SmallString<256> Buffer; 5239 llvm::raw_svector_ostream Out(Buffer); 5240 const clang::DeclRefExpr *DE; 5241 const VarDecl *D = ::getBaseDecl(Ref, DE); 5242 if (!D) 5243 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5244 D = D->getCanonicalDecl(); 5245 std::string Name = CGM.getOpenMPRuntime().getName( 5246 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5247 Out << Prefix << Name << "_" 5248 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5249 return std::string(Out.str()); 5250 } 5251 5252 /// Emits reduction initializer function: 5253 /// \code 5254 /// void @.red_init(void* %arg, void* %orig) { 5255 /// %0 = bitcast void* %arg to <type>* 5256 /// store <type> <init>, <type>* %0 5257 /// ret void 5258 /// } 5259 /// \endcode emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5260 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5261 SourceLocation Loc, 5262 ReductionCodeGen &RCG, unsigned N) { 5263 ASTContext &C = CGM.getContext(); 5264 QualType VoidPtrTy = C.VoidPtrTy; 5265 VoidPtrTy.addRestrict(); 5266 FunctionArgList Args; 5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5268 ImplicitParamKind::Other); 5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5270 ImplicitParamKind::Other); 5271 Args.emplace_back(&Param); 5272 Args.emplace_back(&ParamOrig); 5273 const auto &FnInfo = 5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5276 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5277 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5278 Name, &CGM.getModule()); 5279 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5280 Fn->setDoesNotRecurse(); 5281 CodeGenFunction CGF(CGM); 5282 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5283 QualType PrivateType = RCG.getPrivateType(N); 5284 Address PrivateAddr = CGF.EmitLoadOfPointer( 5285 CGF.GetAddrOfLocalVar(&Param).withElementType( 5286 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), 5287 C.getPointerType(PrivateType)->castAs<PointerType>()); 5288 llvm::Value *Size = nullptr; 5289 // If the size of the reduction item is non-constant, load it from global 5290 // threadprivate variable. 5291 if (RCG.getSizes(N).second) { 5292 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5293 CGF, CGM.getContext().getSizeType(), 5294 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5295 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5296 CGM.getContext().getSizeType(), Loc); 5297 } 5298 RCG.emitAggregateType(CGF, N, Size); 5299 Address OrigAddr = Address::invalid(); 5300 // If initializer uses initializer from declare reduction construct, emit a 5301 // pointer to the address of the original reduction item (reuired by reduction 5302 // initializer) 5303 if (RCG.usesReductionInitializer(N)) { 5304 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5305 OrigAddr = CGF.EmitLoadOfPointer( 5306 SharedAddr, 5307 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5308 } 5309 // Emit the initializer: 5310 // %0 = bitcast void* %arg to <type>* 5311 // store <type> <init>, <type>* %0 5312 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5313 [](CodeGenFunction &) { return false; }); 5314 CGF.FinishFunction(); 5315 return Fn; 5316 } 5317 5318 /// Emits reduction combiner function: 5319 /// \code 5320 /// void @.red_comb(void* %arg0, void* %arg1) { 5321 /// %lhs = bitcast void* %arg0 to <type>* 5322 /// %rhs = bitcast void* %arg1 to <type>* 5323 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5324 /// store <type> %2, <type>* %lhs 5325 /// ret void 5326 /// } 5327 /// \endcode emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5328 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5329 SourceLocation Loc, 5330 ReductionCodeGen &RCG, unsigned N, 5331 const Expr *ReductionOp, 5332 const Expr *LHS, const Expr *RHS, 5333 const Expr *PrivateRef) { 5334 ASTContext &C = CGM.getContext(); 5335 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5336 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5337 FunctionArgList Args; 5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5339 C.VoidPtrTy, ImplicitParamKind::Other); 5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5341 ImplicitParamKind::Other); 5342 Args.emplace_back(&ParamInOut); 5343 Args.emplace_back(&ParamIn); 5344 const auto &FnInfo = 5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5347 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5348 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5349 Name, &CGM.getModule()); 5350 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5351 Fn->setDoesNotRecurse(); 5352 CodeGenFunction CGF(CGM); 5353 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5354 llvm::Value *Size = nullptr; 5355 // If the size of the reduction item is non-constant, load it from global 5356 // threadprivate variable. 5357 if (RCG.getSizes(N).second) { 5358 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5359 CGF, CGM.getContext().getSizeType(), 5360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5361 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5362 CGM.getContext().getSizeType(), Loc); 5363 } 5364 RCG.emitAggregateType(CGF, N, Size); 5365 // Remap lhs and rhs variables to the addresses of the function arguments. 5366 // %lhs = bitcast void* %arg0 to <type>* 5367 // %rhs = bitcast void* %arg1 to <type>* 5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5369 PrivateScope.addPrivate( 5370 LHSVD, 5371 // Pull out the pointer to the variable. 5372 CGF.EmitLoadOfPointer( 5373 CGF.GetAddrOfLocalVar(&ParamInOut) 5374 .withElementType( 5375 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), 5376 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5377 PrivateScope.addPrivate( 5378 RHSVD, 5379 // Pull out the pointer to the variable. 5380 CGF.EmitLoadOfPointer( 5381 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5382 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), 5383 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5384 PrivateScope.Privatize(); 5385 // Emit the combiner body: 5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5387 // store <type> %2, <type>* %lhs 5388 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5390 cast<DeclRefExpr>(RHS)); 5391 CGF.FinishFunction(); 5392 return Fn; 5393 } 5394 5395 /// Emits reduction finalizer function: 5396 /// \code 5397 /// void @.red_fini(void* %arg) { 5398 /// %0 = bitcast void* %arg to <type>* 5399 /// <destroy>(<type>* %0) 5400 /// ret void 5401 /// } 5402 /// \endcode emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5404 SourceLocation Loc, 5405 ReductionCodeGen &RCG, unsigned N) { 5406 if (!RCG.needCleanups(N)) 5407 return nullptr; 5408 ASTContext &C = CGM.getContext(); 5409 FunctionArgList Args; 5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5411 ImplicitParamKind::Other); 5412 Args.emplace_back(&Param); 5413 const auto &FnInfo = 5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5418 Name, &CGM.getModule()); 5419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5420 Fn->setDoesNotRecurse(); 5421 CodeGenFunction CGF(CGM); 5422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5423 Address PrivateAddr = CGF.EmitLoadOfPointer( 5424 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5425 llvm::Value *Size = nullptr; 5426 // If the size of the reduction item is non-constant, load it from global 5427 // threadprivate variable. 5428 if (RCG.getSizes(N).second) { 5429 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5430 CGF, CGM.getContext().getSizeType(), 5431 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5432 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5433 CGM.getContext().getSizeType(), Loc); 5434 } 5435 RCG.emitAggregateType(CGF, N, Size); 5436 // Emit the finalizer body: 5437 // <destroy>(<type>* %0) 5438 RCG.emitCleanups(CGF, N, PrivateAddr); 5439 CGF.FinishFunction(Loc); 5440 return Fn; 5441 } 5442 emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5443 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5444 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5447 return nullptr; 5448 5449 // Build typedef struct: 5450 // kmp_taskred_input { 5451 // void *reduce_shar; // shared reduction item 5452 // void *reduce_orig; // original reduction item used for initialization 5453 // size_t reduce_size; // size of data item 5454 // void *reduce_init; // data initialization routine 5455 // void *reduce_fini; // data finalization routine 5456 // void *reduce_comb; // data combiner routine 5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5458 // } kmp_taskred_input_t; 5459 ASTContext &C = CGM.getContext(); 5460 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5461 RD->startDefinition(); 5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5468 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5469 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5470 RD->completeDefinition(); 5471 QualType RDType = C.getRecordType(RD); 5472 unsigned Size = Data.ReductionVars.size(); 5473 llvm::APInt ArraySize(/*numBits=*/64, Size); 5474 QualType ArrayRDType = 5475 C.getConstantArrayType(RDType, ArraySize, nullptr, 5476 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 5477 // kmp_task_red_input_t .rd_input.[Size]; 5478 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5480 Data.ReductionCopies, Data.ReductionOps); 5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5484 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5486 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5488 ".rd_input.gep."); 5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType); 5490 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5491 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5492 RCG.emitSharedOrigLValue(CGF, Cnt); 5493 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5494 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5495 // ElemLVal.reduce_orig = &Origs[Cnt]; 5496 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5497 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5498 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5499 RCG.emitAggregateType(CGF, Cnt); 5500 llvm::Value *SizeValInChars; 5501 llvm::Value *SizeVal; 5502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5503 // We use delayed creation/initialization for VLAs and array sections. It is 5504 // required because runtime does not provide the way to pass the sizes of 5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5506 // threadprivate global variables are used to store these values and use 5507 // them in the functions. 5508 bool DelayedCreation = !!SizeVal; 5509 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5510 /*isSigned=*/false); 5511 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5512 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5513 // ElemLVal.reduce_init = init; 5514 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5516 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5517 // ElemLVal.reduce_fini = fini; 5518 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5520 llvm::Value *FiniAddr = 5521 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5522 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5523 // ElemLVal.reduce_comb = comb; 5524 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5525 llvm::Value *CombAddr = emitReduceCombFunction( 5526 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5527 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5528 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5529 // ElemLVal.flags = 0; 5530 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5531 if (DelayedCreation) { 5532 CGF.EmitStoreOfScalar( 5533 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5534 FlagsLVal); 5535 } else 5536 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5537 } 5538 if (Data.IsReductionWithTaskMod) { 5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5540 // is_ws, int num, void *data); 5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5543 CGM.IntTy, /*isSigned=*/true); 5544 llvm::Value *Args[] = { 5545 IdentTLoc, GTid, 5546 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5547 /*isSigned=*/true), 5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5550 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5551 return CGF.EmitRuntimeCall( 5552 OMPBuilder.getOrCreateRuntimeFunction( 5553 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5554 Args); 5555 } 5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5557 llvm::Value *Args[] = { 5558 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5559 /*isSigned=*/true), 5560 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5561 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5562 CGM.VoidPtrTy)}; 5563 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5564 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5565 Args); 5566 } 5567 emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)5568 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5569 SourceLocation Loc, 5570 bool IsWorksharingReduction) { 5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5572 // is_ws, int num, void *data); 5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5575 CGM.IntTy, /*isSigned=*/true); 5576 llvm::Value *Args[] = {IdentTLoc, GTid, 5577 llvm::ConstantInt::get(CGM.IntTy, 5578 IsWorksharingReduction ? 1 : 0, 5579 /*isSigned=*/true)}; 5580 (void)CGF.EmitRuntimeCall( 5581 OMPBuilder.getOrCreateRuntimeFunction( 5582 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5583 Args); 5584 } 5585 emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5586 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5587 SourceLocation Loc, 5588 ReductionCodeGen &RCG, 5589 unsigned N) { 5590 auto Sizes = RCG.getSizes(N); 5591 // Emit threadprivate global variable if the type is non-constant 5592 // (Sizes.second = nullptr). 5593 if (Sizes.second) { 5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5595 /*isSigned=*/false); 5596 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5597 CGF, CGM.getContext().getSizeType(), 5598 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5599 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5600 } 5601 } 5602 getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)5603 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5604 SourceLocation Loc, 5605 llvm::Value *ReductionsPtr, 5606 LValue SharedLVal) { 5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5608 // *d); 5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5610 CGM.IntTy, 5611 /*isSigned=*/true), 5612 ReductionsPtr, 5613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5614 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5615 return Address( 5616 CGF.EmitRuntimeCall( 5617 OMPBuilder.getOrCreateRuntimeFunction( 5618 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5619 Args), 5620 CGF.Int8Ty, SharedLVal.getAlignment()); 5621 } 5622 emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)5623 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5624 const OMPTaskDataTy &Data) { 5625 if (!CGF.HaveInsertPoint()) 5626 return; 5627 5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5630 OMPBuilder.createTaskwait(CGF.Builder); 5631 } else { 5632 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5634 auto &M = CGM.getModule(); 5635 Address DependenciesArray = Address::invalid(); 5636 llvm::Value *NumOfElements; 5637 std::tie(NumOfElements, DependenciesArray) = 5638 emitDependClause(CGF, Data.Dependences, Loc); 5639 if (!Data.Dependences.empty()) { 5640 llvm::Value *DepWaitTaskArgs[7]; 5641 DepWaitTaskArgs[0] = UpLoc; 5642 DepWaitTaskArgs[1] = ThreadID; 5643 DepWaitTaskArgs[2] = NumOfElements; 5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5647 DepWaitTaskArgs[6] = 5648 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5649 5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5651 5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5655 // kmp_int32 has_no_wait); if dependence info is specified. 5656 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5657 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5658 DepWaitTaskArgs); 5659 5660 } else { 5661 5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5663 // global_tid); 5664 llvm::Value *Args[] = {UpLoc, ThreadID}; 5665 // Ignore return result until untied tasks are supported. 5666 CGF.EmitRuntimeCall( 5667 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5668 Args); 5669 } 5670 } 5671 5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5673 Region->emitUntiedSwitch(CGF); 5674 } 5675 emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)5676 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5677 OpenMPDirectiveKind InnerKind, 5678 const RegionCodeGenTy &CodeGen, 5679 bool HasCancel) { 5680 if (!CGF.HaveInsertPoint()) 5681 return; 5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5683 InnerKind != OMPD_critical && 5684 InnerKind != OMPD_master && 5685 InnerKind != OMPD_masked); 5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5687 } 5688 5689 namespace { 5690 enum RTCancelKind { 5691 CancelNoreq = 0, 5692 CancelParallel = 1, 5693 CancelLoop = 2, 5694 CancelSections = 3, 5695 CancelTaskgroup = 4 5696 }; 5697 } // anonymous namespace 5698 getCancellationKind(OpenMPDirectiveKind CancelRegion)5699 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5700 RTCancelKind CancelKind = CancelNoreq; 5701 if (CancelRegion == OMPD_parallel) 5702 CancelKind = CancelParallel; 5703 else if (CancelRegion == OMPD_for) 5704 CancelKind = CancelLoop; 5705 else if (CancelRegion == OMPD_sections) 5706 CancelKind = CancelSections; 5707 else { 5708 assert(CancelRegion == OMPD_taskgroup); 5709 CancelKind = CancelTaskgroup; 5710 } 5711 return CancelKind; 5712 } 5713 emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)5714 void CGOpenMPRuntime::emitCancellationPointCall( 5715 CodeGenFunction &CGF, SourceLocation Loc, 5716 OpenMPDirectiveKind CancelRegion) { 5717 if (!CGF.HaveInsertPoint()) 5718 return; 5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5720 // global_tid, kmp_int32 cncl_kind); 5721 if (auto *OMPRegionInfo = 5722 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5723 // For 'cancellation point taskgroup', the task region info may not have a 5724 // cancel. This may instead happen in another adjacent task. 5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5726 llvm::Value *Args[] = { 5727 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5728 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5729 // Ignore return result until untied tasks are supported. 5730 llvm::Value *Result = CGF.EmitRuntimeCall( 5731 OMPBuilder.getOrCreateRuntimeFunction( 5732 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5733 Args); 5734 // if (__kmpc_cancellationpoint()) { 5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5736 // exit from construct; 5737 // } 5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5741 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5742 CGF.EmitBlock(ExitBB); 5743 if (CancelRegion == OMPD_parallel) 5744 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5745 // exit from construct; 5746 CodeGenFunction::JumpDest CancelDest = 5747 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5748 CGF.EmitBranchThroughCleanup(CancelDest); 5749 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5750 } 5751 } 5752 } 5753 emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)5754 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5755 const Expr *IfCond, 5756 OpenMPDirectiveKind CancelRegion) { 5757 if (!CGF.HaveInsertPoint()) 5758 return; 5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5760 // kmp_int32 cncl_kind); 5761 auto &M = CGM.getModule(); 5762 if (auto *OMPRegionInfo = 5763 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5764 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5766 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5767 llvm::Value *Args[] = { 5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5769 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5770 // Ignore return result until untied tasks are supported. 5771 llvm::Value *Result = CGF.EmitRuntimeCall( 5772 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5773 // if (__kmpc_cancel()) { 5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5775 // exit from construct; 5776 // } 5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5780 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5781 CGF.EmitBlock(ExitBB); 5782 if (CancelRegion == OMPD_parallel) 5783 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5784 // exit from construct; 5785 CodeGenFunction::JumpDest CancelDest = 5786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5787 CGF.EmitBranchThroughCleanup(CancelDest); 5788 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5789 }; 5790 if (IfCond) { 5791 emitIfClause(CGF, IfCond, ThenGen, 5792 [](CodeGenFunction &, PrePostActionTy &) {}); 5793 } else { 5794 RegionCodeGenTy ThenRCG(ThenGen); 5795 ThenRCG(CGF); 5796 } 5797 } 5798 } 5799 5800 namespace { 5801 /// Cleanup action for uses_allocators support. 5802 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5803 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5804 5805 public: OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)5806 OMPUsesAllocatorsActionTy( 5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5808 : Allocators(Allocators) {} Enter(CodeGenFunction & CGF)5809 void Enter(CodeGenFunction &CGF) override { 5810 if (!CGF.HaveInsertPoint()) 5811 return; 5812 for (const auto &AllocatorData : Allocators) { 5813 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 5814 CGF, AllocatorData.first, AllocatorData.second); 5815 } 5816 } Exit(CodeGenFunction & CGF)5817 void Exit(CodeGenFunction &CGF) override { 5818 if (!CGF.HaveInsertPoint()) 5819 return; 5820 for (const auto &AllocatorData : Allocators) { 5821 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 5822 AllocatorData.first); 5823 } 5824 } 5825 }; 5826 } // namespace 5827 emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)5828 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5829 const OMPExecutableDirective &D, StringRef ParentName, 5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5832 assert(!ParentName.empty() && "Invalid target entry parent name!"); 5833 HasEmittedTargetRegion = true; 5834 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 5838 if (!D.AllocatorTraits) 5839 continue; 5840 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 5841 } 5842 } 5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 5844 CodeGen.setAction(UsesAllocatorAction); 5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5846 IsOffloadEntry, CodeGen); 5847 } 5848 emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)5849 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 5850 const Expr *Allocator, 5851 const Expr *AllocatorTraits) { 5852 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5853 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5854 // Use default memspace handle. 5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5856 llvm::Value *NumTraits = llvm::ConstantInt::get( 5857 CGF.IntTy, cast<ConstantArrayType>( 5858 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 5859 ->getSize() 5860 .getLimitedValue()); 5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 5862 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5863 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 5865 AllocatorTraitsLVal.getBaseInfo(), 5866 AllocatorTraitsLVal.getTBAAInfo()); 5867 llvm::Value *Traits = Addr.emitRawPointer(CGF); 5868 5869 llvm::Value *AllocatorVal = 5870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5871 CGM.getModule(), OMPRTL___kmpc_init_allocator), 5872 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 5873 // Store to allocator. 5874 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 5875 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 5876 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5877 AllocatorVal = 5878 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 5879 Allocator->getType(), Allocator->getExprLoc()); 5880 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 5881 } 5882 emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)5883 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 5884 const Expr *Allocator) { 5885 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5886 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5887 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5888 llvm::Value *AllocatorVal = 5889 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 5890 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 5891 CGF.getContext().VoidPtrTy, 5892 Allocator->getExprLoc()); 5893 (void)CGF.EmitRuntimeCall( 5894 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 5895 OMPRTL___kmpc_destroy_allocator), 5896 {ThreadId, AllocatorVal}); 5897 } 5898 computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective & D,CodeGenFunction & CGF,int32_t & MinThreadsVal,int32_t & MaxThreadsVal,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)5899 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( 5900 const OMPExecutableDirective &D, CodeGenFunction &CGF, 5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, 5902 int32_t &MaxTeamsVal) { 5903 5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); 5905 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, 5906 /*UpperBoundOnly=*/true); 5907 5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5909 for (auto *A : C->getAttrs()) { 5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; 5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; 5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) 5913 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, 5914 &AttrMinBlocksVal, &AttrMaxBlocksVal); 5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) 5916 CGM.handleAMDGPUFlatWorkGroupSizeAttr( 5917 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, 5918 &AttrMaxThreadsVal); 5919 else 5920 continue; 5921 5922 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); 5923 if (AttrMaxThreadsVal > 0) 5924 MaxThreadsVal = MaxThreadsVal > 0 5925 ? std::min(MaxThreadsVal, AttrMaxThreadsVal) 5926 : AttrMaxThreadsVal; 5927 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); 5928 if (AttrMaxBlocksVal > 0) 5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) 5930 : AttrMaxBlocksVal; 5931 } 5932 } 5933 } 5934 emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)5935 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5936 const OMPExecutableDirective &D, StringRef ParentName, 5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5939 5940 llvm::TargetRegionEntryInfo EntryInfo = 5941 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 5942 5943 CodeGenFunction CGF(CGM, true); 5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 5946 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5947 5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5950 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 5951 }; 5952 5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, 5954 IsOffloadEntry, OutlinedFn, OutlinedFnID); 5955 5956 if (!OutlinedFn) 5957 return; 5958 5959 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 5960 5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5962 for (auto *A : C->getAttrs()) { 5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) 5964 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); 5965 } 5966 } 5967 } 5968 5969 /// Checks if the expression is constant or does not have non-trivial function 5970 /// calls. isTrivial(ASTContext & Ctx,const Expr * E)5971 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 5972 // We can skip constant expressions. 5973 // We can skip expressions with trivial calls or simple expressions. 5974 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 5975 !E->hasNonTrivialCall(Ctx)) && 5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 5977 } 5978 getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)5979 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 5980 const Stmt *Body) { 5981 const Stmt *Child = Body->IgnoreContainers(); 5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 5983 Child = nullptr; 5984 for (const Stmt *S : C->body()) { 5985 if (const auto *E = dyn_cast<Expr>(S)) { 5986 if (isTrivial(Ctx, E)) 5987 continue; 5988 } 5989 // Some of the statements can be ignored. 5990 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 5991 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 5992 continue; 5993 // Analyze declarations. 5994 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 5995 if (llvm::all_of(DS->decls(), [](const Decl *D) { 5996 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 5997 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 5998 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 5999 isa<UsingDirectiveDecl>(D) || 6000 isa<OMPDeclareReductionDecl>(D) || 6001 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6002 return true; 6003 const auto *VD = dyn_cast<VarDecl>(D); 6004 if (!VD) 6005 return false; 6006 return VD->hasGlobalStorage() || !VD->isUsed(); 6007 })) 6008 continue; 6009 } 6010 // Found multiple children - cannot get the one child only. 6011 if (Child) 6012 return nullptr; 6013 Child = S; 6014 } 6015 if (Child) 6016 Child = Child->IgnoreContainers(); 6017 } 6018 return Child; 6019 } 6020 getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)6021 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, 6023 int32_t &MaxTeamsVal) { 6024 6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6027 "Expected target-based executable directive."); 6028 switch (DirectiveKind) { 6029 case OMPD_target: { 6030 const auto *CS = D.getInnermostCapturedStmt(); 6031 const auto *Body = 6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6033 const Stmt *ChildStmt = 6034 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6035 if (const auto *NestedDir = 6036 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6037 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6039 const Expr *NumTeams = 6040 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6041 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6042 if (auto Constant = 6043 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6044 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6045 return NumTeams; 6046 } 6047 MinTeamsVal = MaxTeamsVal = 0; 6048 return nullptr; 6049 } 6050 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6051 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6052 MinTeamsVal = MaxTeamsVal = 1; 6053 return nullptr; 6054 } 6055 MinTeamsVal = MaxTeamsVal = 1; 6056 return nullptr; 6057 } 6058 // A value of -1 is used to check if we need to emit no teams region 6059 MinTeamsVal = MaxTeamsVal = -1; 6060 return nullptr; 6061 } 6062 case OMPD_target_teams_loop: 6063 case OMPD_target_teams: 6064 case OMPD_target_teams_distribute: 6065 case OMPD_target_teams_distribute_simd: 6066 case OMPD_target_teams_distribute_parallel_for: 6067 case OMPD_target_teams_distribute_parallel_for_simd: { 6068 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6069 const Expr *NumTeams = 6070 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6071 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6072 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6073 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6074 return NumTeams; 6075 } 6076 MinTeamsVal = MaxTeamsVal = 0; 6077 return nullptr; 6078 } 6079 case OMPD_target_parallel: 6080 case OMPD_target_parallel_for: 6081 case OMPD_target_parallel_for_simd: 6082 case OMPD_target_parallel_loop: 6083 case OMPD_target_simd: 6084 MinTeamsVal = MaxTeamsVal = 1; 6085 return nullptr; 6086 case OMPD_parallel: 6087 case OMPD_for: 6088 case OMPD_parallel_for: 6089 case OMPD_parallel_loop: 6090 case OMPD_parallel_master: 6091 case OMPD_parallel_sections: 6092 case OMPD_for_simd: 6093 case OMPD_parallel_for_simd: 6094 case OMPD_cancel: 6095 case OMPD_cancellation_point: 6096 case OMPD_ordered: 6097 case OMPD_threadprivate: 6098 case OMPD_allocate: 6099 case OMPD_task: 6100 case OMPD_simd: 6101 case OMPD_tile: 6102 case OMPD_unroll: 6103 case OMPD_sections: 6104 case OMPD_section: 6105 case OMPD_single: 6106 case OMPD_master: 6107 case OMPD_critical: 6108 case OMPD_taskyield: 6109 case OMPD_barrier: 6110 case OMPD_taskwait: 6111 case OMPD_taskgroup: 6112 case OMPD_atomic: 6113 case OMPD_flush: 6114 case OMPD_depobj: 6115 case OMPD_scan: 6116 case OMPD_teams: 6117 case OMPD_target_data: 6118 case OMPD_target_exit_data: 6119 case OMPD_target_enter_data: 6120 case OMPD_distribute: 6121 case OMPD_distribute_simd: 6122 case OMPD_distribute_parallel_for: 6123 case OMPD_distribute_parallel_for_simd: 6124 case OMPD_teams_distribute: 6125 case OMPD_teams_distribute_simd: 6126 case OMPD_teams_distribute_parallel_for: 6127 case OMPD_teams_distribute_parallel_for_simd: 6128 case OMPD_target_update: 6129 case OMPD_declare_simd: 6130 case OMPD_declare_variant: 6131 case OMPD_begin_declare_variant: 6132 case OMPD_end_declare_variant: 6133 case OMPD_declare_target: 6134 case OMPD_end_declare_target: 6135 case OMPD_declare_reduction: 6136 case OMPD_declare_mapper: 6137 case OMPD_taskloop: 6138 case OMPD_taskloop_simd: 6139 case OMPD_master_taskloop: 6140 case OMPD_master_taskloop_simd: 6141 case OMPD_parallel_master_taskloop: 6142 case OMPD_parallel_master_taskloop_simd: 6143 case OMPD_requires: 6144 case OMPD_metadirective: 6145 case OMPD_unknown: 6146 break; 6147 default: 6148 break; 6149 } 6150 llvm_unreachable("Unexpected directive kind."); 6151 } 6152 emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6153 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6154 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6155 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6156 "Clauses associated with the teams directive expected to be emitted " 6157 "only for the host!"); 6158 CGBuilderTy &Bld = CGF.Builder; 6159 int32_t MinNT = -1, MaxNT = -1; 6160 const Expr *NumTeams = 6161 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); 6162 if (NumTeams != nullptr) { 6163 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6164 6165 switch (DirectiveKind) { 6166 case OMPD_target: { 6167 const auto *CS = D.getInnermostCapturedStmt(); 6168 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6169 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6170 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6171 /*IgnoreResultAssign*/ true); 6172 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6173 /*isSigned=*/true); 6174 } 6175 case OMPD_target_teams: 6176 case OMPD_target_teams_distribute: 6177 case OMPD_target_teams_distribute_simd: 6178 case OMPD_target_teams_distribute_parallel_for: 6179 case OMPD_target_teams_distribute_parallel_for_simd: { 6180 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6181 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6182 /*IgnoreResultAssign*/ true); 6183 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6184 /*isSigned=*/true); 6185 } 6186 default: 6187 break; 6188 } 6189 } 6190 6191 assert(MinNT == MaxNT && "Num threads ranges require handling here."); 6192 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); 6193 } 6194 6195 /// Check for a num threads constant value (stored in \p DefaultVal), or 6196 /// expression (stored in \p E). If the value is conditional (via an if-clause), 6197 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are 6198 /// nullptr, no expression evaluation is perfomed. getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,const Expr ** E,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal)6199 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6200 const Expr **E, int32_t &UpperBound, 6201 bool UpperBoundOnly, llvm::Value **CondVal) { 6202 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6203 CGF.getContext(), CS->getCapturedStmt()); 6204 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6205 if (!Dir) 6206 return; 6207 6208 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6209 // Handle if clause. If if clause present, the number of threads is 6210 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6211 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { 6212 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6214 const OMPIfClause *IfClause = nullptr; 6215 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6216 if (C->getNameModifier() == OMPD_unknown || 6217 C->getNameModifier() == OMPD_parallel) { 6218 IfClause = C; 6219 break; 6220 } 6221 } 6222 if (IfClause) { 6223 const Expr *CondExpr = IfClause->getCondition(); 6224 bool Result; 6225 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6226 if (!Result) { 6227 UpperBound = 1; 6228 return; 6229 } 6230 } else { 6231 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); 6232 if (const auto *PreInit = 6233 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6234 for (const auto *I : PreInit->decls()) { 6235 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6236 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6237 } else { 6238 CodeGenFunction::AutoVarEmission Emission = 6239 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6240 CGF.EmitAutoVarCleanups(Emission); 6241 } 6242 } 6243 *CondVal = CGF.EvaluateExprAsBool(CondExpr); 6244 } 6245 } 6246 } 6247 } 6248 // Check the value of num_threads clause iff if clause was not specified 6249 // or is not evaluated to false. 6250 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6251 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6252 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6253 const auto *NumThreadsClause = 6254 Dir->getSingleClause<OMPNumThreadsClause>(); 6255 const Expr *NTExpr = NumThreadsClause->getNumThreads(); 6256 if (NTExpr->isIntegerConstantExpr(CGF.getContext())) 6257 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) 6258 UpperBound = 6259 UpperBound 6260 ? Constant->getZExtValue() 6261 : std::min(UpperBound, 6262 static_cast<int32_t>(Constant->getZExtValue())); 6263 // If we haven't found a upper bound, remember we saw a thread limiting 6264 // clause. 6265 if (UpperBound == -1) 6266 UpperBound = 0; 6267 if (!E) 6268 return; 6269 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); 6270 if (const auto *PreInit = 6271 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6272 for (const auto *I : PreInit->decls()) { 6273 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6274 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6275 } else { 6276 CodeGenFunction::AutoVarEmission Emission = 6277 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6278 CGF.EmitAutoVarCleanups(Emission); 6279 } 6280 } 6281 } 6282 *E = NTExpr; 6283 } 6284 return; 6285 } 6286 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6287 UpperBound = 1; 6288 } 6289 getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal,const Expr ** ThreadLimitExpr)6290 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6291 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, 6292 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { 6293 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && 6294 "Clauses associated with the teams directive expected to be emitted " 6295 "only for the host!"); 6296 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6297 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6298 "Expected target-based executable directive."); 6299 6300 const Expr *NT = nullptr; 6301 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; 6302 6303 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { 6304 if (E->isIntegerConstantExpr(CGF.getContext())) { 6305 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) 6306 UpperBound = UpperBound ? Constant->getZExtValue() 6307 : std::min(UpperBound, 6308 int32_t(Constant->getZExtValue())); 6309 } 6310 // If we haven't found a upper bound, remember we saw a thread limiting 6311 // clause. 6312 if (UpperBound == -1) 6313 UpperBound = 0; 6314 if (EPtr) 6315 *EPtr = E; 6316 }; 6317 6318 auto ReturnSequential = [&]() { 6319 UpperBound = 1; 6320 return NT; 6321 }; 6322 6323 switch (DirectiveKind) { 6324 case OMPD_target: { 6325 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6326 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6327 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6328 CGF.getContext(), CS->getCapturedStmt()); 6329 // TODO: The standard is not clear how to resolve two thread limit clauses, 6330 // let's pick the teams one if it's present, otherwise the target one. 6331 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6332 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6333 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6334 ThreadLimitClause = TLC; 6335 if (ThreadLimitExpr) { 6336 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6338 CodeGenFunction::LexicalScope Scope( 6339 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6340 if (const auto *PreInit = 6341 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6342 for (const auto *I : PreInit->decls()) { 6343 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6344 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6345 } else { 6346 CodeGenFunction::AutoVarEmission Emission = 6347 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6348 CGF.EmitAutoVarCleanups(Emission); 6349 } 6350 } 6351 } 6352 } 6353 } 6354 } 6355 if (ThreadLimitClause) 6356 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6357 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6358 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6359 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6360 CS = Dir->getInnermostCapturedStmt(); 6361 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6362 CGF.getContext(), CS->getCapturedStmt()); 6363 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6364 } 6365 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6366 CS = Dir->getInnermostCapturedStmt(); 6367 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6368 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6369 return ReturnSequential(); 6370 } 6371 return NT; 6372 } 6373 case OMPD_target_teams: { 6374 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6375 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6376 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6377 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6378 } 6379 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6380 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6381 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6382 CGF.getContext(), CS->getCapturedStmt()); 6383 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6384 if (Dir->getDirectiveKind() == OMPD_distribute) { 6385 CS = Dir->getInnermostCapturedStmt(); 6386 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6387 } 6388 } 6389 return NT; 6390 } 6391 case OMPD_target_teams_distribute: 6392 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6393 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6394 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6395 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6396 } 6397 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, 6398 UpperBoundOnly, CondVal); 6399 return NT; 6400 case OMPD_target_teams_loop: 6401 case OMPD_target_parallel_loop: 6402 case OMPD_target_parallel: 6403 case OMPD_target_parallel_for: 6404 case OMPD_target_parallel_for_simd: 6405 case OMPD_target_teams_distribute_parallel_for: 6406 case OMPD_target_teams_distribute_parallel_for_simd: { 6407 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { 6408 const OMPIfClause *IfClause = nullptr; 6409 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6410 if (C->getNameModifier() == OMPD_unknown || 6411 C->getNameModifier() == OMPD_parallel) { 6412 IfClause = C; 6413 break; 6414 } 6415 } 6416 if (IfClause) { 6417 const Expr *Cond = IfClause->getCondition(); 6418 bool Result; 6419 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6420 if (!Result) 6421 return ReturnSequential(); 6422 } else { 6423 CodeGenFunction::RunCleanupsScope Scope(CGF); 6424 *CondVal = CGF.EvaluateExprAsBool(Cond); 6425 } 6426 } 6427 } 6428 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6429 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6430 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6431 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); 6432 } 6433 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6434 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6435 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6436 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); 6437 return NumThreadsClause->getNumThreads(); 6438 } 6439 return NT; 6440 } 6441 case OMPD_target_teams_distribute_simd: 6442 case OMPD_target_simd: 6443 return ReturnSequential(); 6444 default: 6445 break; 6446 } 6447 llvm_unreachable("Unsupported directive kind."); 6448 } 6449 emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6450 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6451 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6452 llvm::Value *NumThreadsVal = nullptr; 6453 llvm::Value *CondVal = nullptr; 6454 llvm::Value *ThreadLimitVal = nullptr; 6455 const Expr *ThreadLimitExpr = nullptr; 6456 int32_t UpperBound = -1; 6457 6458 const Expr *NT = getNumThreadsExprForTargetDirective( 6459 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, 6460 &ThreadLimitExpr); 6461 6462 // Thread limit expressions are used below, emit them. 6463 if (ThreadLimitExpr) { 6464 ThreadLimitVal = 6465 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); 6466 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, 6467 /*isSigned=*/false); 6468 } 6469 6470 // Generate the num teams expression. 6471 if (UpperBound == 1) { 6472 NumThreadsVal = CGF.Builder.getInt32(UpperBound); 6473 } else if (NT) { 6474 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); 6475 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, 6476 /*isSigned=*/false); 6477 } else if (ThreadLimitVal) { 6478 // If we do not have a num threads value but a thread limit, replace the 6479 // former with the latter. We know handled the thread limit expression. 6480 NumThreadsVal = ThreadLimitVal; 6481 ThreadLimitVal = nullptr; 6482 } else { 6483 // Default to "0" which means runtime choice. 6484 assert(!ThreadLimitVal && "Default not applicable with thread limit value"); 6485 NumThreadsVal = CGF.Builder.getInt32(0); 6486 } 6487 6488 // Handle if clause. If if clause present, the number of threads is 6489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6490 if (CondVal) { 6491 CodeGenFunction::RunCleanupsScope Scope(CGF); 6492 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, 6493 CGF.Builder.getInt32(1)); 6494 } 6495 6496 // If the thread limit and num teams expression were present, take the 6497 // minimum. 6498 if (ThreadLimitVal) { 6499 NumThreadsVal = CGF.Builder.CreateSelect( 6500 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), 6501 ThreadLimitVal, NumThreadsVal); 6502 } 6503 6504 return NumThreadsVal; 6505 } 6506 6507 namespace { 6508 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6509 6510 // Utility to handle information from clauses associated with a given 6511 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6512 // It provides a convenient interface to obtain the information and generate 6513 // code for that information. 6514 class MappableExprsHandler { 6515 public: 6516 /// Get the offset of the OMP_MAP_MEMBER_OF field. getFlagMemberOffset()6517 static unsigned getFlagMemberOffset() { 6518 unsigned Offset = 0; 6519 for (uint64_t Remain = 6520 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6521 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6522 !(Remain & 1); Remain = Remain >> 1) 6523 Offset++; 6524 return Offset; 6525 } 6526 6527 /// Class that holds debugging information for a data mapping to be passed to 6528 /// the runtime library. 6529 class MappingExprInfo { 6530 /// The variable declaration used for the data mapping. 6531 const ValueDecl *MapDecl = nullptr; 6532 /// The original expression used in the map clause, or null if there is 6533 /// none. 6534 const Expr *MapExpr = nullptr; 6535 6536 public: MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)6537 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6538 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6539 getMapDecl() const6540 const ValueDecl *getMapDecl() const { return MapDecl; } getMapExpr() const6541 const Expr *getMapExpr() const { return MapExpr; } 6542 }; 6543 6544 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6545 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6546 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6547 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6548 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6549 using MapNonContiguousArrayTy = 6550 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6551 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6552 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6553 6554 /// This structure contains combined information generated for mappable 6555 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6556 /// mappers, and non-contiguous information. 6557 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6558 MapExprsArrayTy Exprs; 6559 MapValueDeclsArrayTy Mappers; 6560 MapValueDeclsArrayTy DevicePtrDecls; 6561 6562 /// Append arrays in \a CurInfo. append__anon93cce0fb2c11::MappableExprsHandler::MapCombinedInfoTy6563 void append(MapCombinedInfoTy &CurInfo) { 6564 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6565 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6566 CurInfo.DevicePtrDecls.end()); 6567 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6568 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6569 } 6570 }; 6571 6572 /// Map between a struct and the its lowest & highest elements which have been 6573 /// mapped. 6574 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6575 /// HE(FieldIndex, Pointer)} 6576 struct StructRangeInfoTy { 6577 MapCombinedInfoTy PreliminaryMapData; 6578 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6579 0, Address::invalid()}; 6580 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6581 0, Address::invalid()}; 6582 Address Base = Address::invalid(); 6583 Address LB = Address::invalid(); 6584 bool IsArraySection = false; 6585 bool HasCompleteRecord = false; 6586 }; 6587 6588 private: 6589 /// Kind that defines how a device pointer has to be returned. 6590 struct MapInfo { 6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6592 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6593 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6594 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6595 bool ReturnDevicePointer = false; 6596 bool IsImplicit = false; 6597 const ValueDecl *Mapper = nullptr; 6598 const Expr *VarRef = nullptr; 6599 bool ForDeviceAddr = false; 6600 6601 MapInfo() = default; MapInfo__anon93cce0fb2c11::MappableExprsHandler::MapInfo6602 MapInfo( 6603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6604 OpenMPMapClauseKind MapType, 6605 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6606 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6607 bool ReturnDevicePointer, bool IsImplicit, 6608 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6609 bool ForDeviceAddr = false) 6610 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6611 MotionModifiers(MotionModifiers), 6612 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6613 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6614 }; 6615 6616 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6617 /// member and there is no map information about it, then emission of that 6618 /// entry is deferred until the whole struct has been processed. 6619 struct DeferredDevicePtrEntryTy { 6620 const Expr *IE = nullptr; 6621 const ValueDecl *VD = nullptr; 6622 bool ForDeviceAddr = false; 6623 DeferredDevicePtrEntryTy__anon93cce0fb2c11::MappableExprsHandler::DeferredDevicePtrEntryTy6624 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6625 bool ForDeviceAddr) 6626 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6627 }; 6628 6629 /// The target directive from where the mappable clauses were extracted. It 6630 /// is either a executable directive or a user-defined mapper directive. 6631 llvm::PointerUnion<const OMPExecutableDirective *, 6632 const OMPDeclareMapperDecl *> 6633 CurDir; 6634 6635 /// Function the directive is being generated for. 6636 CodeGenFunction &CGF; 6637 6638 /// Set of all first private variables in the current directive. 6639 /// bool data is set to true if the variable is implicitly marked as 6640 /// firstprivate, false otherwise. 6641 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6642 6643 /// Map between device pointer declarations and their expression components. 6644 /// The key value for declarations in 'this' is null. 6645 llvm::DenseMap< 6646 const ValueDecl *, 6647 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6648 DevPointersMap; 6649 6650 /// Map between device addr declarations and their expression components. 6651 /// The key value for declarations in 'this' is null. 6652 llvm::DenseMap< 6653 const ValueDecl *, 6654 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6655 HasDevAddrsMap; 6656 6657 /// Map between lambda declarations and their map type. 6658 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6659 getExprTypeSize(const Expr * E) const6660 llvm::Value *getExprTypeSize(const Expr *E) const { 6661 QualType ExprTy = E->getType().getCanonicalType(); 6662 6663 // Calculate the size for array shaping expression. 6664 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6665 llvm::Value *Size = 6666 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6667 for (const Expr *SE : OAE->getDimensions()) { 6668 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6669 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6670 CGF.getContext().getSizeType(), 6671 SE->getExprLoc()); 6672 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6673 } 6674 return Size; 6675 } 6676 6677 // Reference types are ignored for mapping purposes. 6678 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6679 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6680 6681 // Given that an array section is considered a built-in type, we need to 6682 // do the calculation based on the length of the section instead of relying 6683 // on CGF.getTypeSize(E->getType()). 6684 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) { 6685 QualType BaseTy = ArraySectionExpr::getBaseOriginalType( 6686 OAE->getBase()->IgnoreParenImpCasts()) 6687 .getCanonicalType(); 6688 6689 // If there is no length associated with the expression and lower bound is 6690 // not specified too, that means we are using the whole length of the 6691 // base. 6692 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6693 !OAE->getLowerBound()) 6694 return CGF.getTypeSize(BaseTy); 6695 6696 llvm::Value *ElemSize; 6697 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6698 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6699 } else { 6700 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6701 assert(ATy && "Expecting array type if not a pointer type."); 6702 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6703 } 6704 6705 // If we don't have a length at this point, that is because we have an 6706 // array section with a single element. 6707 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6708 return ElemSize; 6709 6710 if (const Expr *LenExpr = OAE->getLength()) { 6711 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6712 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6713 CGF.getContext().getSizeType(), 6714 LenExpr->getExprLoc()); 6715 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6716 } 6717 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6718 OAE->getLowerBound() && "expected array_section[lb:]."); 6719 // Size = sizetype - lb * elemtype; 6720 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6721 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 6722 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 6723 CGF.getContext().getSizeType(), 6724 OAE->getLowerBound()->getExprLoc()); 6725 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 6726 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 6727 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 6728 LengthVal = CGF.Builder.CreateSelect( 6729 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 6730 return LengthVal; 6731 } 6732 return CGF.getTypeSize(ExprTy); 6733 } 6734 6735 /// Return the corresponding bits for a given map clause modifier. Add 6736 /// a flag marking the map as a pointer if requested. Add a flag marking the 6737 /// map as the first one of a series of maps that relate to the same map 6738 /// expression. getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const6739 OpenMPOffloadMappingFlags getMapTypeBits( 6740 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6741 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 6742 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 6743 OpenMPOffloadMappingFlags Bits = 6744 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 6745 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 6746 switch (MapType) { 6747 case OMPC_MAP_alloc: 6748 case OMPC_MAP_release: 6749 // alloc and release is the default behavior in the runtime library, i.e. 6750 // if we don't pass any bits alloc/release that is what the runtime is 6751 // going to do. Therefore, we don't need to signal anything for these two 6752 // type modifiers. 6753 break; 6754 case OMPC_MAP_to: 6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 6756 break; 6757 case OMPC_MAP_from: 6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6759 break; 6760 case OMPC_MAP_tofrom: 6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 6762 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6763 break; 6764 case OMPC_MAP_delete: 6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 6766 break; 6767 case OMPC_MAP_unknown: 6768 llvm_unreachable("Unexpected map type!"); 6769 } 6770 if (AddPtrFlag) 6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 6772 if (AddIsTargetParamFlag) 6773 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 6774 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 6776 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 6777 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 6778 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 6779 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 6780 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 6781 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 6782 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 6783 if (IsNonContiguous) 6784 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 6785 return Bits; 6786 } 6787 6788 /// Return true if the provided expression is a final array section. A 6789 /// final array section, is one whose length can't be proved to be one. isFinalArraySectionExpression(const Expr * E) const6790 bool isFinalArraySectionExpression(const Expr *E) const { 6791 const auto *OASE = dyn_cast<ArraySectionExpr>(E); 6792 6793 // It is not an array section and therefore not a unity-size one. 6794 if (!OASE) 6795 return false; 6796 6797 // An array section with no colon always refer to a single element. 6798 if (OASE->getColonLocFirst().isInvalid()) 6799 return false; 6800 6801 const Expr *Length = OASE->getLength(); 6802 6803 // If we don't have a length we have to check if the array has size 1 6804 // for this dimension. Also, we should always expect a length if the 6805 // base type is pointer. 6806 if (!Length) { 6807 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType( 6808 OASE->getBase()->IgnoreParenImpCasts()) 6809 .getCanonicalType(); 6810 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6811 return ATy->getSExtSize() != 1; 6812 // If we don't have a constant dimension length, we have to consider 6813 // the current section as having any size, so it is not necessarily 6814 // unitary. If it happen to be unity size, that's user fault. 6815 return true; 6816 } 6817 6818 // Check if the length evaluates to 1. 6819 Expr::EvalResult Result; 6820 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6821 return true; // Can have more that size 1. 6822 6823 llvm::APSInt ConstLength = Result.Val.getInt(); 6824 return ConstLength.getSExtValue() != 1; 6825 } 6826 6827 /// Generate the base pointers, section pointers, sizes, map type bits, and 6828 /// user-defined mappers (all included in \a CombinedInfo) for the provided 6829 /// map type, map or motion modifiers, and expression components. 6830 /// \a IsFirstComponent should be set to true if the provided set of 6831 /// components is the first associated with a capture. generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,MapCombinedInfoTy & StructBaseCombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool GenerateAllInfoForClauses,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=std::nullopt,bool AreBothBasePtrAndPteeMapped=false) const6832 void generateInfoForComponentList( 6833 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6834 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6836 MapCombinedInfoTy &CombinedInfo, 6837 MapCombinedInfoTy &StructBaseCombinedInfo, 6838 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6839 bool IsImplicit, bool GenerateAllInfoForClauses, 6840 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 6841 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 6842 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6843 OverlappedElements = std::nullopt, 6844 bool AreBothBasePtrAndPteeMapped = false) const { 6845 // The following summarizes what has to be generated for each map and the 6846 // types below. The generated information is expressed in this order: 6847 // base pointer, section pointer, size, flags 6848 // (to add to the ones that come from the map type and modifier). 6849 // 6850 // double d; 6851 // int i[100]; 6852 // float *p; 6853 // int **a = &i; 6854 // 6855 // struct S1 { 6856 // int i; 6857 // float f[50]; 6858 // } 6859 // struct S2 { 6860 // int i; 6861 // float f[50]; 6862 // S1 s; 6863 // double *p; 6864 // struct S2 *ps; 6865 // int &ref; 6866 // } 6867 // S2 s; 6868 // S2 *ps; 6869 // 6870 // map(d) 6871 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6872 // 6873 // map(i) 6874 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6875 // 6876 // map(i[1:23]) 6877 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6878 // 6879 // map(p) 6880 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6881 // 6882 // map(p[1:24]) 6883 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 6884 // in unified shared memory mode or for local pointers 6885 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6886 // 6887 // map((*a)[0:3]) 6888 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6889 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 6890 // 6891 // map(**a) 6892 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6893 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 6894 // 6895 // map(s) 6896 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6897 // 6898 // map(s.i) 6899 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6900 // 6901 // map(s.s.f) 6902 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6903 // 6904 // map(s.p) 6905 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6906 // 6907 // map(to: s.p[:22]) 6908 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6909 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6910 // &(s.p), &(s.p[0]), 22*sizeof(double), 6911 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6912 // (*) alloc space for struct members, only this is a target parameter 6913 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6914 // optimizes this entry out, same in the examples below) 6915 // (***) map the pointee (map: to) 6916 // 6917 // map(to: s.ref) 6918 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 6919 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6920 // (*) alloc space for struct members, only this is a target parameter 6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6922 // optimizes this entry out, same in the examples below) 6923 // (***) map the pointee (map: to) 6924 // 6925 // map(s.ps) 6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6927 // 6928 // map(from: s.ps->s.i) 6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6932 // 6933 // map(to: s.ps->ps) 6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6937 // 6938 // map(s.ps->ps->ps) 6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6943 // 6944 // map(to: s.ps->ps->s.f[:22]) 6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6949 // 6950 // map(ps) 6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6952 // 6953 // map(ps->i) 6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6955 // 6956 // map(ps->s.f) 6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6958 // 6959 // map(from: ps->p) 6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6961 // 6962 // map(to: ps->p[:22]) 6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6966 // 6967 // map(ps->ps) 6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6969 // 6970 // map(from: ps->ps->s.i) 6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6974 // 6975 // map(from: ps->ps->ps) 6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6979 // 6980 // map(ps->ps->ps->ps) 6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6985 // 6986 // map(to: ps->ps->ps->s.f[:22]) 6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6991 // 6992 // map(to: s.f[:22]) map(from: s.p[:33]) 6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6994 // sizeof(double*) (**), TARGET_PARAM 6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6998 // (*) allocate contiguous space needed to fit all mapped members even if 6999 // we allocate space for members not mapped (in this example, 7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7001 // them as well because they fall between &s.f[0] and &s.p) 7002 // 7003 // map(from: s.f[:22]) map(to: ps->p[:33]) 7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7008 // (*) the struct this entry pertains to is the 2nd element in the list of 7009 // arguments, hence MEMBER_OF(2) 7010 // 7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7018 // (*) the struct this entry pertains to is the 4th element in the list 7019 // of arguments, hence MEMBER_OF(4) 7020 // 7021 // map(p, p[:100]) 7022 // ===> map(p[:100]) 7023 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM 7024 7025 // Track if the map information being generated is the first for a capture. 7026 bool IsCaptureFirstInfo = IsFirstComponentList; 7027 // When the variable is on a declare target link or in a to clause with 7028 // unified memory, a reference is needed to hold the host/device address 7029 // of the variable. 7030 bool RequiresReference = false; 7031 7032 // Scan the components from the base to the complete expression. 7033 auto CI = Components.rbegin(); 7034 auto CE = Components.rend(); 7035 auto I = CI; 7036 7037 // Track if the map information being generated is the first for a list of 7038 // components. 7039 bool IsExpressionFirstInfo = true; 7040 bool FirstPointerInComplexData = false; 7041 Address BP = Address::invalid(); 7042 const Expr *AssocExpr = I->getAssociatedExpression(); 7043 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7044 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7045 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7046 7047 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE) 7048 return; 7049 if (isa<MemberExpr>(AssocExpr)) { 7050 // The base is the 'this' pointer. The content of the pointer is going 7051 // to be the base of the field being mapped. 7052 BP = CGF.LoadCXXThisAddress(); 7053 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7054 (OASE && 7055 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7056 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7057 } else if (OAShE && 7058 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7059 BP = Address( 7060 CGF.EmitScalarExpr(OAShE->getBase()), 7061 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7062 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7063 } else { 7064 // The base is the reference to the variable. 7065 // BP = &Var. 7066 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7067 if (const auto *VD = 7068 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7069 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7070 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7071 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7072 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7073 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7074 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7075 RequiresReference = true; 7076 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7077 } 7078 } 7079 } 7080 7081 // If the variable is a pointer and is being dereferenced (i.e. is not 7082 // the last component), the base has to be the pointer itself, not its 7083 // reference. References are ignored for mapping purposes. 7084 QualType Ty = 7085 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7086 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7087 // No need to generate individual map information for the pointer, it 7088 // can be associated with the combined storage if shared memory mode is 7089 // active or the base declaration is not global variable. 7090 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7091 if (!AreBothBasePtrAndPteeMapped && 7092 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7093 !VD || VD->hasLocalStorage())) 7094 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7095 else 7096 FirstPointerInComplexData = true; 7097 ++I; 7098 } 7099 } 7100 7101 // Track whether a component of the list should be marked as MEMBER_OF some 7102 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7103 // in a component list should be marked as MEMBER_OF, all subsequent entries 7104 // do not belong to the base struct. E.g. 7105 // struct S2 s; 7106 // s.ps->ps->ps->f[:] 7107 // (1) (2) (3) (4) 7108 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7109 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7110 // is the pointee of ps(2) which is not member of struct s, so it should not 7111 // be marked as such (it is still PTR_AND_OBJ). 7112 // The variable is initialized to false so that PTR_AND_OBJ entries which 7113 // are not struct members are not considered (e.g. array of pointers to 7114 // data). 7115 bool ShouldBeMemberOf = false; 7116 7117 // Variable keeping track of whether or not we have encountered a component 7118 // in the component list which is a member expression. Useful when we have a 7119 // pointer or a final array section, in which case it is the previous 7120 // component in the list which tells us whether we have a member expression. 7121 // E.g. X.f[:] 7122 // While processing the final array section "[:]" it is "f" which tells us 7123 // whether we are dealing with a member of a declared struct. 7124 const MemberExpr *EncounteredME = nullptr; 7125 7126 // Track for the total number of dimension. Start from one for the dummy 7127 // dimension. 7128 uint64_t DimSize = 1; 7129 7130 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7131 bool IsPrevMemberReference = false; 7132 7133 // We need to check if we will be encountering any MEs. If we do not 7134 // encounter any ME expression it means we will be mapping the whole struct. 7135 // In that case we need to skip adding an entry for the struct to the 7136 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo 7137 // list only when generating all info for clauses. 7138 bool IsMappingWholeStruct = true; 7139 if (!GenerateAllInfoForClauses) { 7140 IsMappingWholeStruct = false; 7141 } else { 7142 for (auto TempI = I; TempI != CE; ++TempI) { 7143 const MemberExpr *PossibleME = 7144 dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); 7145 if (PossibleME) { 7146 IsMappingWholeStruct = false; 7147 break; 7148 } 7149 } 7150 } 7151 7152 for (; I != CE; ++I) { 7153 // If the current component is member of a struct (parent struct) mark it. 7154 if (!EncounteredME) { 7155 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7156 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7157 // as MEMBER_OF the parent struct. 7158 if (EncounteredME) { 7159 ShouldBeMemberOf = true; 7160 // Do not emit as complex pointer if this is actually not array-like 7161 // expression. 7162 if (FirstPointerInComplexData) { 7163 QualType Ty = std::prev(I) 7164 ->getAssociatedDeclaration() 7165 ->getType() 7166 .getNonReferenceType(); 7167 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7168 FirstPointerInComplexData = false; 7169 } 7170 } 7171 } 7172 7173 auto Next = std::next(I); 7174 7175 // We need to generate the addresses and sizes if this is the last 7176 // component, if the component is a pointer or if it is an array section 7177 // whose length can't be proved to be one. If this is a pointer, it 7178 // becomes the base address for the following components. 7179 7180 // A final array section, is one whose length can't be proved to be one. 7181 // If the map item is non-contiguous then we don't treat any array section 7182 // as final array section. 7183 bool IsFinalArraySection = 7184 !IsNonContiguous && 7185 isFinalArraySectionExpression(I->getAssociatedExpression()); 7186 7187 // If we have a declaration for the mapping use that, otherwise use 7188 // the base declaration of the map clause. 7189 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7190 ? I->getAssociatedDeclaration() 7191 : BaseDecl; 7192 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7193 : MapExpr; 7194 7195 // Get information on whether the element is a pointer. Have to do a 7196 // special treatment for array sections given that they are built-in 7197 // types. 7198 const auto *OASE = 7199 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression()); 7200 const auto *OAShE = 7201 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7202 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7203 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7204 bool IsPointer = 7205 OAShE || 7206 (OASE && ArraySectionExpr::getBaseOriginalType(OASE) 7207 .getCanonicalType() 7208 ->isAnyPointerType()) || 7209 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7210 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7211 MapDecl && 7212 MapDecl->getType()->isLValueReferenceType(); 7213 bool IsNonDerefPointer = IsPointer && 7214 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7215 !IsNonContiguous; 7216 7217 if (OASE) 7218 ++DimSize; 7219 7220 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7221 IsFinalArraySection) { 7222 // If this is not the last component, we expect the pointer to be 7223 // associated with an array expression or member expression. 7224 assert((Next == CE || 7225 isa<MemberExpr>(Next->getAssociatedExpression()) || 7226 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7227 isa<ArraySectionExpr>(Next->getAssociatedExpression()) || 7228 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7229 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7230 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7231 "Unexpected expression"); 7232 7233 Address LB = Address::invalid(); 7234 Address LowestElem = Address::invalid(); 7235 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7236 const MemberExpr *E) { 7237 const Expr *BaseExpr = E->getBase(); 7238 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7239 // scalar. 7240 LValue BaseLV; 7241 if (E->isArrow()) { 7242 LValueBaseInfo BaseInfo; 7243 TBAAAccessInfo TBAAInfo; 7244 Address Addr = 7245 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7246 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7247 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7248 } else { 7249 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7250 } 7251 return BaseLV; 7252 }; 7253 if (OAShE) { 7254 LowestElem = LB = 7255 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7256 CGF.ConvertTypeForMem( 7257 OAShE->getBase()->getType()->getPointeeType()), 7258 CGF.getContext().getTypeAlignInChars( 7259 OAShE->getBase()->getType())); 7260 } else if (IsMemberReference) { 7261 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7262 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7263 LowestElem = CGF.EmitLValueForFieldInitialization( 7264 BaseLVal, cast<FieldDecl>(MapDecl)) 7265 .getAddress(); 7266 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7267 .getAddress(); 7268 } else { 7269 LowestElem = LB = 7270 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7271 .getAddress(); 7272 } 7273 7274 // If this component is a pointer inside the base struct then we don't 7275 // need to create any entry for it - it will be combined with the object 7276 // it is pointing to into a single PTR_AND_OBJ entry. 7277 bool IsMemberPointerOrAddr = 7278 EncounteredME && 7279 (((IsPointer || ForDeviceAddr) && 7280 I->getAssociatedExpression() == EncounteredME) || 7281 (IsPrevMemberReference && !IsPointer) || 7282 (IsMemberReference && Next != CE && 7283 !Next->getAssociatedExpression()->getType()->isPointerType())); 7284 if (!OverlappedElements.empty() && Next == CE) { 7285 // Handle base element with the info for overlapped elements. 7286 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7287 assert(!IsPointer && 7288 "Unexpected base element with the pointer type."); 7289 // Mark the whole struct as the struct that requires allocation on the 7290 // device. 7291 PartialStruct.LowestElem = {0, LowestElem}; 7292 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7293 I->getAssociatedExpression()->getType()); 7294 Address HB = CGF.Builder.CreateConstGEP( 7295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7296 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7297 TypeSize.getQuantity() - 1); 7298 PartialStruct.HighestElem = { 7299 std::numeric_limits<decltype( 7300 PartialStruct.HighestElem.first)>::max(), 7301 HB}; 7302 PartialStruct.Base = BP; 7303 PartialStruct.LB = LB; 7304 assert( 7305 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7306 "Overlapped elements must be used only once for the variable."); 7307 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7308 // Emit data for non-overlapped data. 7309 OpenMPOffloadMappingFlags Flags = 7310 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7311 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7312 /*AddPtrFlag=*/false, 7313 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7314 llvm::Value *Size = nullptr; 7315 // Do bitcopy of all non-overlapped structure elements. 7316 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7317 Component : OverlappedElements) { 7318 Address ComponentLB = Address::invalid(); 7319 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7320 Component) { 7321 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7322 const auto *FD = dyn_cast<FieldDecl>(VD); 7323 if (FD && FD->getType()->isLValueReferenceType()) { 7324 const auto *ME = 7325 cast<MemberExpr>(MC.getAssociatedExpression()); 7326 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7327 ComponentLB = 7328 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7329 .getAddress(); 7330 } else { 7331 ComponentLB = 7332 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7333 .getAddress(); 7334 } 7335 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); 7336 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7337 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, 7338 LBPtr); 7339 break; 7340 } 7341 } 7342 assert(Size && "Failed to determine structure size"); 7343 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7344 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7345 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7346 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7347 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7348 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7349 Size, CGF.Int64Ty, /*isSigned=*/true)); 7350 CombinedInfo.Types.push_back(Flags); 7351 CombinedInfo.Mappers.push_back(nullptr); 7352 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7353 : 1); 7354 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7355 } 7356 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7357 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7358 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7359 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7360 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7361 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7362 Size = CGF.Builder.CreatePtrDiff( 7363 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), 7364 LBPtr); 7365 CombinedInfo.Sizes.push_back( 7366 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7367 CombinedInfo.Types.push_back(Flags); 7368 CombinedInfo.Mappers.push_back(nullptr); 7369 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7370 : 1); 7371 break; 7372 } 7373 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7374 // Skip adding an entry in the CurInfo of this combined entry if the 7375 // whole struct is currently being mapped. The struct needs to be added 7376 // in the first position before any data internal to the struct is being 7377 // mapped. 7378 if (!IsMemberPointerOrAddr || 7379 (Next == CE && MapType != OMPC_MAP_unknown)) { 7380 if (!IsMappingWholeStruct) { 7381 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7382 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7383 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7384 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7385 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7386 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7387 Size, CGF.Int64Ty, /*isSigned=*/true)); 7388 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7389 : 1); 7390 } else { 7391 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7392 StructBaseCombinedInfo.BasePointers.push_back( 7393 BP.emitRawPointer(CGF)); 7394 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); 7395 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7396 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7397 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7398 Size, CGF.Int64Ty, /*isSigned=*/true)); 7399 StructBaseCombinedInfo.NonContigInfo.Dims.push_back( 7400 IsNonContiguous ? DimSize : 1); 7401 } 7402 7403 // If Mapper is valid, the last component inherits the mapper. 7404 bool HasMapper = Mapper && Next == CE; 7405 if (!IsMappingWholeStruct) 7406 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7407 else 7408 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper 7409 : nullptr); 7410 7411 // We need to add a pointer flag for each map that comes from the 7412 // same expression except for the first one. We also need to signal 7413 // this map is the first one that relates with the current capture 7414 // (there is a set of entries for each capture). 7415 OpenMPOffloadMappingFlags Flags = 7416 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7417 !IsExpressionFirstInfo || RequiresReference || 7418 FirstPointerInComplexData || IsMemberReference, 7419 AreBothBasePtrAndPteeMapped || 7420 (IsCaptureFirstInfo && !RequiresReference), 7421 IsNonContiguous); 7422 7423 if (!IsExpressionFirstInfo || IsMemberReference) { 7424 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7425 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7426 if (IsPointer || (IsMemberReference && Next != CE)) 7427 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7428 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7429 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7430 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7431 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7432 7433 if (ShouldBeMemberOf) { 7434 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7435 // should be later updated with the correct value of MEMBER_OF. 7436 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7437 // From now on, all subsequent PTR_AND_OBJ entries should not be 7438 // marked as MEMBER_OF. 7439 ShouldBeMemberOf = false; 7440 } 7441 } 7442 7443 if (!IsMappingWholeStruct) 7444 CombinedInfo.Types.push_back(Flags); 7445 else 7446 StructBaseCombinedInfo.Types.push_back(Flags); 7447 } 7448 7449 // If we have encountered a member expression so far, keep track of the 7450 // mapped member. If the parent is "*this", then the value declaration 7451 // is nullptr. 7452 if (EncounteredME) { 7453 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7454 unsigned FieldIndex = FD->getFieldIndex(); 7455 7456 // Update info about the lowest and highest elements for this struct 7457 if (!PartialStruct.Base.isValid()) { 7458 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7459 if (IsFinalArraySection) { 7460 Address HB = 7461 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7462 .getAddress(); 7463 PartialStruct.HighestElem = {FieldIndex, HB}; 7464 } else { 7465 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7466 } 7467 PartialStruct.Base = BP; 7468 PartialStruct.LB = BP; 7469 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7470 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7471 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7472 if (IsFinalArraySection) { 7473 Address HB = 7474 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7475 .getAddress(); 7476 PartialStruct.HighestElem = {FieldIndex, HB}; 7477 } else { 7478 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7479 } 7480 } 7481 } 7482 7483 // Need to emit combined struct for array sections. 7484 if (IsFinalArraySection || IsNonContiguous) 7485 PartialStruct.IsArraySection = true; 7486 7487 // If we have a final array section, we are done with this expression. 7488 if (IsFinalArraySection) 7489 break; 7490 7491 // The pointer becomes the base for the next element. 7492 if (Next != CE) 7493 BP = IsMemberReference ? LowestElem : LB; 7494 7495 IsExpressionFirstInfo = false; 7496 IsCaptureFirstInfo = false; 7497 FirstPointerInComplexData = false; 7498 IsPrevMemberReference = IsMemberReference; 7499 } else if (FirstPointerInComplexData) { 7500 QualType Ty = Components.rbegin() 7501 ->getAssociatedDeclaration() 7502 ->getType() 7503 .getNonReferenceType(); 7504 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7505 FirstPointerInComplexData = false; 7506 } 7507 } 7508 // If ran into the whole component - allocate the space for the whole 7509 // record. 7510 if (!EncounteredME) 7511 PartialStruct.HasCompleteRecord = true; 7512 7513 if (!IsNonContiguous) 7514 return; 7515 7516 const ASTContext &Context = CGF.getContext(); 7517 7518 // For supporting stride in array section, we need to initialize the first 7519 // dimension size as 1, first offset as 0, and first count as 1 7520 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7521 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7522 MapValuesArrayTy CurStrides; 7523 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7524 uint64_t ElementTypeSize; 7525 7526 // Collect Size information for each dimension and get the element size as 7527 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7528 // should be [10, 10] and the first stride is 4 btyes. 7529 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7530 Components) { 7531 const Expr *AssocExpr = Component.getAssociatedExpression(); 7532 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7533 7534 if (!OASE) 7535 continue; 7536 7537 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7538 auto *CAT = Context.getAsConstantArrayType(Ty); 7539 auto *VAT = Context.getAsVariableArrayType(Ty); 7540 7541 // We need all the dimension size except for the last dimension. 7542 assert((VAT || CAT || &Component == &*Components.begin()) && 7543 "Should be either ConstantArray or VariableArray if not the " 7544 "first Component"); 7545 7546 // Get element size if CurStrides is empty. 7547 if (CurStrides.empty()) { 7548 const Type *ElementType = nullptr; 7549 if (CAT) 7550 ElementType = CAT->getElementType().getTypePtr(); 7551 else if (VAT) 7552 ElementType = VAT->getElementType().getTypePtr(); 7553 else 7554 assert(&Component == &*Components.begin() && 7555 "Only expect pointer (non CAT or VAT) when this is the " 7556 "first Component"); 7557 // If ElementType is null, then it means the base is a pointer 7558 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7559 // for next iteration. 7560 if (ElementType) { 7561 // For the case that having pointer as base, we need to remove one 7562 // level of indirection. 7563 if (&Component != &*Components.begin()) 7564 ElementType = ElementType->getPointeeOrArrayElementType(); 7565 ElementTypeSize = 7566 Context.getTypeSizeInChars(ElementType).getQuantity(); 7567 CurStrides.push_back( 7568 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7569 } 7570 } 7571 // Get dimension value except for the last dimension since we don't need 7572 // it. 7573 if (DimSizes.size() < Components.size() - 1) { 7574 if (CAT) 7575 DimSizes.push_back( 7576 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize())); 7577 else if (VAT) 7578 DimSizes.push_back(CGF.Builder.CreateIntCast( 7579 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7580 /*IsSigned=*/false)); 7581 } 7582 } 7583 7584 // Skip the dummy dimension since we have already have its information. 7585 auto *DI = DimSizes.begin() + 1; 7586 // Product of dimension. 7587 llvm::Value *DimProd = 7588 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7589 7590 // Collect info for non-contiguous. Notice that offset, count, and stride 7591 // are only meaningful for array-section, so we insert a null for anything 7592 // other than array-section. 7593 // Also, the size of offset, count, and stride are not the same as 7594 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7595 // count, and stride are the same as the number of non-contiguous 7596 // declaration in target update to/from clause. 7597 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7598 Components) { 7599 const Expr *AssocExpr = Component.getAssociatedExpression(); 7600 7601 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7602 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7603 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7604 /*isSigned=*/false); 7605 CurOffsets.push_back(Offset); 7606 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7607 CurStrides.push_back(CurStrides.back()); 7608 continue; 7609 } 7610 7611 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7612 7613 if (!OASE) 7614 continue; 7615 7616 // Offset 7617 const Expr *OffsetExpr = OASE->getLowerBound(); 7618 llvm::Value *Offset = nullptr; 7619 if (!OffsetExpr) { 7620 // If offset is absent, then we just set it to zero. 7621 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7622 } else { 7623 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7624 CGF.Int64Ty, 7625 /*isSigned=*/false); 7626 } 7627 CurOffsets.push_back(Offset); 7628 7629 // Count 7630 const Expr *CountExpr = OASE->getLength(); 7631 llvm::Value *Count = nullptr; 7632 if (!CountExpr) { 7633 // In Clang, once a high dimension is an array section, we construct all 7634 // the lower dimension as array section, however, for case like 7635 // arr[0:2][2], Clang construct the inner dimension as an array section 7636 // but it actually is not in an array section form according to spec. 7637 if (!OASE->getColonLocFirst().isValid() && 7638 !OASE->getColonLocSecond().isValid()) { 7639 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7640 } else { 7641 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7642 // When the length is absent it defaults to ⌈(size − 7643 // lower-bound)/stride⌉, where size is the size of the array 7644 // dimension. 7645 const Expr *StrideExpr = OASE->getStride(); 7646 llvm::Value *Stride = 7647 StrideExpr 7648 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7649 CGF.Int64Ty, /*isSigned=*/false) 7650 : nullptr; 7651 if (Stride) 7652 Count = CGF.Builder.CreateUDiv( 7653 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7654 else 7655 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7656 } 7657 } else { 7658 Count = CGF.EmitScalarExpr(CountExpr); 7659 } 7660 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7661 CurCounts.push_back(Count); 7662 7663 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7664 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7665 // Offset Count Stride 7666 // D0 0 1 4 (int) <- dummy dimension 7667 // D1 0 2 8 (2 * (1) * 4) 7668 // D2 1 2 20 (1 * (1 * 5) * 4) 7669 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7670 const Expr *StrideExpr = OASE->getStride(); 7671 llvm::Value *Stride = 7672 StrideExpr 7673 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7674 CGF.Int64Ty, /*isSigned=*/false) 7675 : nullptr; 7676 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7677 if (Stride) 7678 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7679 else 7680 CurStrides.push_back(DimProd); 7681 if (DI != DimSizes.end()) 7682 ++DI; 7683 } 7684 7685 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7686 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7687 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7688 } 7689 7690 /// Return the adjusted map modifiers if the declaration a capture refers to 7691 /// appears in a first-private clause. This is expected to be used only with 7692 /// directives that start with 'target'. 7693 OpenMPOffloadMappingFlags getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7694 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7695 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7696 7697 // A first private variable captured by reference will use only the 7698 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7699 // declaration is known as first-private in this handler. 7700 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7701 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7702 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7703 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7704 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7705 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7706 } 7707 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7708 if (I != LambdasMap.end()) 7709 // for map(to: lambda): using user specified map type. 7710 return getMapTypeBits( 7711 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7712 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), 7713 /*AddPtrFlag=*/false, 7714 /*AddIsTargetParamFlag=*/false, 7715 /*isNonContiguous=*/false); 7716 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7717 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7718 } 7719 getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7720 void getPlainLayout(const CXXRecordDecl *RD, 7721 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7722 bool AsBase) const { 7723 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7724 7725 llvm::StructType *St = 7726 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7727 7728 unsigned NumElements = St->getNumElements(); 7729 llvm::SmallVector< 7730 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7731 RecordLayout(NumElements); 7732 7733 // Fill bases. 7734 for (const auto &I : RD->bases()) { 7735 if (I.isVirtual()) 7736 continue; 7737 7738 QualType BaseTy = I.getType(); 7739 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7740 // Ignore empty bases. 7741 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) || 7742 CGF.getContext() 7743 .getASTRecordLayout(Base) 7744 .getNonVirtualSize() 7745 .isZero()) 7746 continue; 7747 7748 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7749 RecordLayout[FieldIndex] = Base; 7750 } 7751 // Fill in virtual bases. 7752 for (const auto &I : RD->vbases()) { 7753 QualType BaseTy = I.getType(); 7754 // Ignore empty bases. 7755 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy)) 7756 continue; 7757 7758 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7759 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7760 if (RecordLayout[FieldIndex]) 7761 continue; 7762 RecordLayout[FieldIndex] = Base; 7763 } 7764 // Fill in all the fields. 7765 assert(!RD->isUnion() && "Unexpected union."); 7766 for (const auto *Field : RD->fields()) { 7767 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7768 // will fill in later.) 7769 if (!Field->isBitField() && 7770 !isEmptyFieldForLayout(CGF.getContext(), Field)) { 7771 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7772 RecordLayout[FieldIndex] = Field; 7773 } 7774 } 7775 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7776 &Data : RecordLayout) { 7777 if (Data.isNull()) 7778 continue; 7779 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7780 getPlainLayout(Base, Layout, /*AsBase=*/true); 7781 else 7782 Layout.push_back(Data.get<const FieldDecl *>()); 7783 } 7784 } 7785 7786 /// Generate all the base pointers, section pointers, sizes, map types, and 7787 /// mappers for the extracted mappable expressions (all included in \a 7788 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7789 /// pair of the relevant declaration and index where it occurs is appended to 7790 /// the device pointers info array. generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const7791 void generateAllInfoForClauses( 7792 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 7793 llvm::OpenMPIRBuilder &OMPBuilder, 7794 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7795 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7796 // We have to process the component lists that relate with the same 7797 // declaration in a single chunk so that we can generate the map flags 7798 // correctly. Therefore, we organize all lists in a map. 7799 enum MapKind { Present, Allocs, Other, Total }; 7800 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7801 SmallVector<SmallVector<MapInfo, 8>, 4>> 7802 Info; 7803 7804 // Helper function to fill the information map for the different supported 7805 // clauses. 7806 auto &&InfoGen = 7807 [&Info, &SkipVarSet]( 7808 const ValueDecl *D, MapKind Kind, 7809 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7810 OpenMPMapClauseKind MapType, 7811 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7812 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7813 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7814 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 7815 if (SkipVarSet.contains(D)) 7816 return; 7817 auto It = Info.find(D); 7818 if (It == Info.end()) 7819 It = Info 7820 .insert(std::make_pair( 7821 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 7822 .first; 7823 It->second[Kind].emplace_back( 7824 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 7825 IsImplicit, Mapper, VarRef, ForDeviceAddr); 7826 }; 7827 7828 for (const auto *Cl : Clauses) { 7829 const auto *C = dyn_cast<OMPMapClause>(Cl); 7830 if (!C) 7831 continue; 7832 MapKind Kind = Other; 7833 if (llvm::is_contained(C->getMapTypeModifiers(), 7834 OMPC_MAP_MODIFIER_present)) 7835 Kind = Present; 7836 else if (C->getMapType() == OMPC_MAP_alloc) 7837 Kind = Allocs; 7838 const auto *EI = C->getVarRefs().begin(); 7839 for (const auto L : C->component_lists()) { 7840 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 7841 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 7842 C->getMapTypeModifiers(), std::nullopt, 7843 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7844 E); 7845 ++EI; 7846 } 7847 } 7848 for (const auto *Cl : Clauses) { 7849 const auto *C = dyn_cast<OMPToClause>(Cl); 7850 if (!C) 7851 continue; 7852 MapKind Kind = Other; 7853 if (llvm::is_contained(C->getMotionModifiers(), 7854 OMPC_MOTION_MODIFIER_present)) 7855 Kind = Present; 7856 const auto *EI = C->getVarRefs().begin(); 7857 for (const auto L : C->component_lists()) { 7858 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, 7859 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7860 C->isImplicit(), std::get<2>(L), *EI); 7861 ++EI; 7862 } 7863 } 7864 for (const auto *Cl : Clauses) { 7865 const auto *C = dyn_cast<OMPFromClause>(Cl); 7866 if (!C) 7867 continue; 7868 MapKind Kind = Other; 7869 if (llvm::is_contained(C->getMotionModifiers(), 7870 OMPC_MOTION_MODIFIER_present)) 7871 Kind = Present; 7872 const auto *EI = C->getVarRefs().begin(); 7873 for (const auto L : C->component_lists()) { 7874 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, 7875 std::nullopt, C->getMotionModifiers(), 7876 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7877 *EI); 7878 ++EI; 7879 } 7880 } 7881 7882 // Look at the use_device_ptr and use_device_addr clauses information and 7883 // mark the existing map entries as such. If there is no map information for 7884 // an entry in the use_device_ptr and use_device_addr list, we create one 7885 // with map type 'alloc' and zero size section. It is the user fault if that 7886 // was not mapped before. If there is no map information and the pointer is 7887 // a struct member, then we defer the emission of that entry until the whole 7888 // struct has been processed. 7889 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7890 SmallVector<DeferredDevicePtrEntryTy, 4>> 7891 DeferredInfo; 7892 MapCombinedInfoTy UseDeviceDataCombinedInfo; 7893 7894 auto &&UseDeviceDataCombinedInfoGen = 7895 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 7896 CodeGenFunction &CGF, bool IsDevAddr) { 7897 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 7898 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 7899 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 7900 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 7901 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 7902 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 7903 UseDeviceDataCombinedInfo.Sizes.push_back( 7904 llvm::Constant::getNullValue(CGF.Int64Ty)); 7905 UseDeviceDataCombinedInfo.Types.push_back( 7906 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 7907 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 7908 }; 7909 7910 auto &&MapInfoGen = 7911 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 7912 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 7913 OMPClauseMappableExprCommon::MappableExprComponentListRef 7914 Components, 7915 bool IsImplicit, bool IsDevAddr) { 7916 // We didn't find any match in our map information - generate a zero 7917 // size array section - if the pointer is a struct member we defer 7918 // this action until the whole struct has been processed. 7919 if (isa<MemberExpr>(IE)) { 7920 // Insert the pointer into Info to be processed by 7921 // generateInfoForComponentList. Because it is a member pointer 7922 // without a pointee, no entry will be generated for it, therefore 7923 // we need to generate one after the whole struct has been 7924 // processed. Nonetheless, generateInfoForComponentList must be 7925 // called to take the pointer into account for the calculation of 7926 // the range of the partial struct. 7927 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, 7928 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, 7929 nullptr, nullptr, IsDevAddr); 7930 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 7931 } else { 7932 llvm::Value *Ptr; 7933 if (IsDevAddr) { 7934 if (IE->isGLValue()) 7935 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 7936 else 7937 Ptr = CGF.EmitScalarExpr(IE); 7938 } else { 7939 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7940 } 7941 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 7942 } 7943 }; 7944 7945 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 7946 const Expr *IE, bool IsDevAddr) -> bool { 7947 // We potentially have map information for this declaration already. 7948 // Look for the first set of components that refer to it. If found, 7949 // return true. 7950 // If the first component is a member expression, we have to look into 7951 // 'this', which maps to null in the map of map information. Otherwise 7952 // look directly for the information. 7953 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7954 if (It != Info.end()) { 7955 bool Found = false; 7956 for (auto &Data : It->second) { 7957 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 7958 return MI.Components.back().getAssociatedDeclaration() == VD; 7959 }); 7960 // If we found a map entry, signal that the pointer has to be 7961 // returned and move on to the next declaration. Exclude cases where 7962 // the base pointer is mapped as array subscript, array section or 7963 // array shaping. The base address is passed as a pointer to base in 7964 // this case and cannot be used as a base for use_device_ptr list 7965 // item. 7966 if (CI != Data.end()) { 7967 if (IsDevAddr) { 7968 CI->ForDeviceAddr = IsDevAddr; 7969 CI->ReturnDevicePointer = true; 7970 Found = true; 7971 break; 7972 } else { 7973 auto PrevCI = std::next(CI->Components.rbegin()); 7974 const auto *VarD = dyn_cast<VarDecl>(VD); 7975 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7976 isa<MemberExpr>(IE) || 7977 !VD->getType().getNonReferenceType()->isPointerType() || 7978 PrevCI == CI->Components.rend() || 7979 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7980 VarD->hasLocalStorage()) { 7981 CI->ForDeviceAddr = IsDevAddr; 7982 CI->ReturnDevicePointer = true; 7983 Found = true; 7984 break; 7985 } 7986 } 7987 } 7988 } 7989 return Found; 7990 } 7991 return false; 7992 }; 7993 7994 // Look at the use_device_ptr clause information and mark the existing map 7995 // entries as such. If there is no map information for an entry in the 7996 // use_device_ptr list, we create one with map type 'alloc' and zero size 7997 // section. It is the user fault if that was not mapped before. If there is 7998 // no map information and the pointer is a struct member, then we defer the 7999 // emission of that entry until the whole struct has been processed. 8000 for (const auto *Cl : Clauses) { 8001 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8002 if (!C) 8003 continue; 8004 for (const auto L : C->component_lists()) { 8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8006 std::get<1>(L); 8007 assert(!Components.empty() && 8008 "Not expecting empty list of components!"); 8009 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8010 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8011 const Expr *IE = Components.back().getAssociatedExpression(); 8012 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 8013 continue; 8014 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8015 /*IsDevAddr=*/false); 8016 } 8017 } 8018 8019 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8020 for (const auto *Cl : Clauses) { 8021 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8022 if (!C) 8023 continue; 8024 for (const auto L : C->component_lists()) { 8025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8026 std::get<1>(L); 8027 assert(!std::get<1>(L).empty() && 8028 "Not expecting empty list of components!"); 8029 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8030 if (!Processed.insert(VD).second) 8031 continue; 8032 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8033 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8034 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8035 continue; 8036 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8037 /*IsDevAddr=*/true); 8038 } 8039 } 8040 8041 for (const auto &Data : Info) { 8042 StructRangeInfoTy PartialStruct; 8043 // Current struct information: 8044 MapCombinedInfoTy CurInfo; 8045 // Current struct base information: 8046 MapCombinedInfoTy StructBaseCurInfo; 8047 const Decl *D = Data.first; 8048 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8049 bool HasMapBasePtr = false; 8050 bool HasMapArraySec = false; 8051 if (VD && VD->getType()->isAnyPointerType()) { 8052 for (const auto &M : Data.second) { 8053 HasMapBasePtr = any_of(M, [](const MapInfo &L) { 8054 return isa_and_present<DeclRefExpr>(L.VarRef); 8055 }); 8056 HasMapArraySec = any_of(M, [](const MapInfo &L) { 8057 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>( 8058 L.VarRef); 8059 }); 8060 if (HasMapBasePtr && HasMapArraySec) 8061 break; 8062 } 8063 } 8064 for (const auto &M : Data.second) { 8065 for (const MapInfo &L : M) { 8066 assert(!L.Components.empty() && 8067 "Not expecting declaration with no component lists."); 8068 8069 // Remember the current base pointer index. 8070 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8071 unsigned StructBasePointersIdx = 8072 StructBaseCurInfo.BasePointers.size(); 8073 CurInfo.NonContigInfo.IsNonContiguous = 8074 L.Components.back().isNonContiguous(); 8075 generateInfoForComponentList( 8076 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8077 CurInfo, StructBaseCurInfo, PartialStruct, 8078 /*IsFirstComponentList=*/false, L.IsImplicit, 8079 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, 8080 L.VarRef, /*OverlappedElements*/ std::nullopt, 8081 HasMapBasePtr && HasMapArraySec); 8082 8083 // If this entry relates to a device pointer, set the relevant 8084 // declaration and add the 'return pointer' flag. 8085 if (L.ReturnDevicePointer) { 8086 // Check whether a value was added to either CurInfo or 8087 // StructBaseCurInfo and error if no value was added to either of 8088 // them: 8089 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || 8090 StructBasePointersIdx < 8091 StructBaseCurInfo.BasePointers.size()) && 8092 "Unexpected number of mapped base pointers."); 8093 8094 // Choose a base pointer index which is always valid: 8095 const ValueDecl *RelevantVD = 8096 L.Components.back().getAssociatedDeclaration(); 8097 assert(RelevantVD && 8098 "No relevant declaration related with device pointer??"); 8099 8100 // If StructBaseCurInfo has been updated this iteration then work on 8101 // the first new entry added to it i.e. make sure that when multiple 8102 // values are added to any of the lists, the first value added is 8103 // being modified by the assignments below (not the last value 8104 // added). 8105 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { 8106 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = 8107 RelevantVD; 8108 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = 8109 L.ForDeviceAddr ? DeviceInfoTy::Address 8110 : DeviceInfoTy::Pointer; 8111 StructBaseCurInfo.Types[StructBasePointersIdx] |= 8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8113 } else { 8114 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8115 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8116 L.ForDeviceAddr ? DeviceInfoTy::Address 8117 : DeviceInfoTy::Pointer; 8118 CurInfo.Types[CurrentBasePointersIdx] |= 8119 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8120 } 8121 } 8122 } 8123 } 8124 8125 // Append any pending zero-length pointers which are struct members and 8126 // used with use_device_ptr or use_device_addr. 8127 auto CI = DeferredInfo.find(Data.first); 8128 if (CI != DeferredInfo.end()) { 8129 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8130 llvm::Value *BasePtr; 8131 llvm::Value *Ptr; 8132 if (L.ForDeviceAddr) { 8133 if (L.IE->isGLValue()) 8134 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8135 else 8136 Ptr = this->CGF.EmitScalarExpr(L.IE); 8137 BasePtr = Ptr; 8138 // Entry is RETURN_PARAM. Also, set the placeholder value 8139 // MEMBER_OF=FFFF so that the entry is later updated with the 8140 // correct value of MEMBER_OF. 8141 CurInfo.Types.push_back( 8142 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8143 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8144 } else { 8145 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8146 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8147 L.IE->getExprLoc()); 8148 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8149 // placeholder value MEMBER_OF=FFFF so that the entry is later 8150 // updated with the correct value of MEMBER_OF. 8151 CurInfo.Types.push_back( 8152 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8153 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8154 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8155 } 8156 CurInfo.Exprs.push_back(L.VD); 8157 CurInfo.BasePointers.emplace_back(BasePtr); 8158 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8159 CurInfo.DevicePointers.emplace_back( 8160 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8161 CurInfo.Pointers.push_back(Ptr); 8162 CurInfo.Sizes.push_back( 8163 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8164 CurInfo.Mappers.push_back(nullptr); 8165 } 8166 } 8167 8168 // Unify entries in one list making sure the struct mapping precedes the 8169 // individual fields: 8170 MapCombinedInfoTy UnionCurInfo; 8171 UnionCurInfo.append(StructBaseCurInfo); 8172 UnionCurInfo.append(CurInfo); 8173 8174 // If there is an entry in PartialStruct it means we have a struct with 8175 // individual members mapped. Emit an extra combined entry. 8176 if (PartialStruct.Base.isValid()) { 8177 UnionCurInfo.NonContigInfo.Dims.push_back(0); 8178 // Emit a combined entry: 8179 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, 8180 /*IsMapThis*/ !VD, OMPBuilder, VD); 8181 } 8182 8183 // We need to append the results of this capture to what we already have. 8184 CombinedInfo.append(UnionCurInfo); 8185 } 8186 // Append data for use_device_ptr clauses. 8187 CombinedInfo.append(UseDeviceDataCombinedInfo); 8188 } 8189 8190 public: MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8191 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8192 : CurDir(&Dir), CGF(CGF) { 8193 // Extract firstprivate clause information. 8194 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8195 for (const auto *D : C->varlists()) 8196 FirstPrivateDecls.try_emplace( 8197 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8198 // Extract implicit firstprivates from uses_allocators clauses. 8199 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8200 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8201 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8202 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8203 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8204 /*Implicit=*/true); 8205 else if (const auto *VD = dyn_cast<VarDecl>( 8206 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8207 ->getDecl())) 8208 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8209 } 8210 } 8211 // Extract device pointer clause information. 8212 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8213 for (auto L : C->component_lists()) 8214 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8215 // Extract device addr clause information. 8216 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8217 for (auto L : C->component_lists()) 8218 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8219 // Extract map information. 8220 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8221 if (C->getMapType() != OMPC_MAP_to) 8222 continue; 8223 for (auto L : C->component_lists()) { 8224 const ValueDecl *VD = std::get<0>(L); 8225 const auto *RD = VD ? VD->getType() 8226 .getCanonicalType() 8227 .getNonReferenceType() 8228 ->getAsCXXRecordDecl() 8229 : nullptr; 8230 if (RD && RD->isLambda()) 8231 LambdasMap.try_emplace(std::get<0>(L), C); 8232 } 8233 } 8234 } 8235 8236 /// Constructor for the declare mapper directive. MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8237 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8238 : CurDir(&Dir), CGF(CGF) {} 8239 8240 /// Generate code for the combined entry if we have a partially mapped struct 8241 /// and take care of the mapping flags of the arguments corresponding to 8242 /// individual struct members. emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,bool IsMapThis,llvm::OpenMPIRBuilder & OMPBuilder,const ValueDecl * VD=nullptr,bool NotTargetParams=true) const8243 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8244 MapFlagsArrayTy &CurTypes, 8245 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8246 llvm::OpenMPIRBuilder &OMPBuilder, 8247 const ValueDecl *VD = nullptr, 8248 bool NotTargetParams = true) const { 8249 if (CurTypes.size() == 1 && 8250 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8251 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8252 !PartialStruct.IsArraySection) 8253 return; 8254 Address LBAddr = PartialStruct.LowestElem.second; 8255 Address HBAddr = PartialStruct.HighestElem.second; 8256 if (PartialStruct.HasCompleteRecord) { 8257 LBAddr = PartialStruct.LB; 8258 HBAddr = PartialStruct.LB; 8259 } 8260 CombinedInfo.Exprs.push_back(VD); 8261 // Base is the base of the struct 8262 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8263 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8264 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8265 // Pointer is the address of the lowest element 8266 llvm::Value *LB = LBAddr.emitRawPointer(CGF); 8267 const CXXMethodDecl *MD = 8268 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8269 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8270 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8271 // There should not be a mapper for a combined entry. 8272 if (HasBaseClass) { 8273 // OpenMP 5.2 148:21: 8274 // If the target construct is within a class non-static member function, 8275 // and a variable is an accessible data member of the object for which the 8276 // non-static data member function is invoked, the variable is treated as 8277 // if the this[:1] expression had appeared in a map clause with a map-type 8278 // of tofrom. 8279 // Emit this[:1] 8280 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8281 QualType Ty = MD->getFunctionObjectParameterType(); 8282 llvm::Value *Size = 8283 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8284 /*isSigned=*/true); 8285 CombinedInfo.Sizes.push_back(Size); 8286 } else { 8287 CombinedInfo.Pointers.push_back(LB); 8288 // Size is (addr of {highest+1} element) - (addr of lowest element) 8289 llvm::Value *HB = HBAddr.emitRawPointer(CGF); 8290 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8291 HBAddr.getElementType(), HB, /*Idx0=*/1); 8292 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8293 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8294 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8295 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8296 /*isSigned=*/false); 8297 CombinedInfo.Sizes.push_back(Size); 8298 } 8299 CombinedInfo.Mappers.push_back(nullptr); 8300 // Map type is always TARGET_PARAM, if generate info for captures. 8301 CombinedInfo.Types.push_back( 8302 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8303 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8304 // If any element has the present modifier, then make sure the runtime 8305 // doesn't attempt to allocate the struct. 8306 if (CurTypes.end() != 8307 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8308 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8309 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8310 })) 8311 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8312 // Remove TARGET_PARAM flag from the first element 8313 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8314 // If any element has the ompx_hold modifier, then make sure the runtime 8315 // uses the hold reference count for the struct as a whole so that it won't 8316 // be unmapped by an extra dynamic reference count decrement. Add it to all 8317 // elements as well so the runtime knows which reference count to check 8318 // when determining whether it's time for device-to-host transfers of 8319 // individual elements. 8320 if (CurTypes.end() != 8321 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8322 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8323 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8324 })) { 8325 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8326 for (auto &M : CurTypes) 8327 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8328 } 8329 8330 // All other current entries will be MEMBER_OF the combined entry 8331 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8332 // 0xFFFF in the MEMBER_OF field). 8333 OpenMPOffloadMappingFlags MemberOfFlag = 8334 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8335 for (auto &M : CurTypes) 8336 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); 8337 } 8338 8339 /// Generate all the base pointers, section pointers, sizes, map types, and 8340 /// mappers for the extracted mappable expressions (all included in \a 8341 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8342 /// pair of the relevant declaration and index where it occurs is appended to 8343 /// the device pointers info array. generateAllInfo(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8344 void generateAllInfo( 8345 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, 8346 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8347 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8348 assert(CurDir.is<const OMPExecutableDirective *>() && 8349 "Expect a executable directive"); 8350 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8351 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, 8352 SkipVarSet); 8353 } 8354 8355 /// Generate all the base pointers, section pointers, sizes, map types, and 8356 /// mappers for the extracted map clauses of user-defined mapper (all included 8357 /// in \a CombinedInfo). generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder) const8358 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, 8359 llvm::OpenMPIRBuilder &OMPBuilder) const { 8360 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8361 "Expect a declare mapper directive"); 8362 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8363 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, 8364 OMPBuilder); 8365 } 8366 8367 /// Emit capture info for lambdas for variables captured by reference. generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8368 void generateInfoForLambdaCaptures( 8369 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8370 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8371 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8372 const auto *RD = VDType->getAsCXXRecordDecl(); 8373 if (!RD || !RD->isLambda()) 8374 return; 8375 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8376 CGF.getContext().getDeclAlign(VD)); 8377 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8378 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8379 FieldDecl *ThisCapture = nullptr; 8380 RD->getCaptureFields(Captures, ThisCapture); 8381 if (ThisCapture) { 8382 LValue ThisLVal = 8383 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8384 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8385 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8386 VDLVal.getPointer(CGF)); 8387 CombinedInfo.Exprs.push_back(VD); 8388 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8389 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8390 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8391 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8392 CombinedInfo.Sizes.push_back( 8393 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8394 CGF.Int64Ty, /*isSigned=*/true)); 8395 CombinedInfo.Types.push_back( 8396 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8397 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8398 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8399 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8400 CombinedInfo.Mappers.push_back(nullptr); 8401 } 8402 for (const LambdaCapture &LC : RD->captures()) { 8403 if (!LC.capturesVariable()) 8404 continue; 8405 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8406 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8407 continue; 8408 auto It = Captures.find(VD); 8409 assert(It != Captures.end() && "Found lambda capture without field."); 8410 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8411 if (LC.getCaptureKind() == LCK_ByRef) { 8412 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8413 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8414 VDLVal.getPointer(CGF)); 8415 CombinedInfo.Exprs.push_back(VD); 8416 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8417 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8418 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8419 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8420 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8421 CGF.getTypeSize( 8422 VD->getType().getCanonicalType().getNonReferenceType()), 8423 CGF.Int64Ty, /*isSigned=*/true)); 8424 } else { 8425 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8426 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8427 VDLVal.getPointer(CGF)); 8428 CombinedInfo.Exprs.push_back(VD); 8429 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8430 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8431 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8432 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8433 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8434 } 8435 CombinedInfo.Types.push_back( 8436 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8437 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8438 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8439 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8440 CombinedInfo.Mappers.push_back(nullptr); 8441 } 8442 } 8443 8444 /// Set correct indices for lambdas captures. adjustMemberOfForLambdaCaptures(llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8445 void adjustMemberOfForLambdaCaptures( 8446 llvm::OpenMPIRBuilder &OMPBuilder, 8447 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8448 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8449 MapFlagsArrayTy &Types) const { 8450 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8451 // Set correct member_of idx for all implicit lambda captures. 8452 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8453 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8454 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8455 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8456 continue; 8457 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8458 assert(BasePtr && "Unable to find base lambda address."); 8459 int TgtIdx = -1; 8460 for (unsigned J = I; J > 0; --J) { 8461 unsigned Idx = J - 1; 8462 if (Pointers[Idx] != BasePtr) 8463 continue; 8464 TgtIdx = Idx; 8465 break; 8466 } 8467 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8468 // All other current entries will be MEMBER_OF the combined entry 8469 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8470 // 0xFFFF in the MEMBER_OF field). 8471 OpenMPOffloadMappingFlags MemberOfFlag = 8472 OMPBuilder.getMemberOfFlag(TgtIdx); 8473 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8474 } 8475 } 8476 8477 /// Generate the base pointers, section pointers, sizes, map types, and 8478 /// mappers associated to a given capture (all included in \a CombinedInfo). generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct) const8479 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8480 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8481 StructRangeInfoTy &PartialStruct) const { 8482 assert(!Cap->capturesVariableArrayType() && 8483 "Not expecting to generate map info for a variable array type!"); 8484 8485 // We need to know when we generating information for the first component 8486 const ValueDecl *VD = Cap->capturesThis() 8487 ? nullptr 8488 : Cap->getCapturedVar()->getCanonicalDecl(); 8489 8490 // for map(to: lambda): skip here, processing it in 8491 // generateDefaultMapInfo 8492 if (LambdasMap.count(VD)) 8493 return; 8494 8495 // If this declaration appears in a is_device_ptr clause we just have to 8496 // pass the pointer by value. If it is a reference to a declaration, we just 8497 // pass its value. 8498 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8499 CombinedInfo.Exprs.push_back(VD); 8500 CombinedInfo.BasePointers.emplace_back(Arg); 8501 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8502 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8503 CombinedInfo.Pointers.push_back(Arg); 8504 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8505 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8506 /*isSigned=*/true)); 8507 CombinedInfo.Types.push_back( 8508 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8510 CombinedInfo.Mappers.push_back(nullptr); 8511 return; 8512 } 8513 8514 using MapData = 8515 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8516 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8517 const ValueDecl *, const Expr *>; 8518 SmallVector<MapData, 4> DeclComponentLists; 8519 // For member fields list in is_device_ptr, store it in 8520 // DeclComponentLists for generating components info. 8521 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8522 auto It = DevPointersMap.find(VD); 8523 if (It != DevPointersMap.end()) 8524 for (const auto &MCL : It->second) 8525 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8526 /*IsImpicit = */ true, nullptr, 8527 nullptr); 8528 auto I = HasDevAddrsMap.find(VD); 8529 if (I != HasDevAddrsMap.end()) 8530 for (const auto &MCL : I->second) 8531 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8532 /*IsImpicit = */ true, nullptr, 8533 nullptr); 8534 assert(CurDir.is<const OMPExecutableDirective *>() && 8535 "Expect a executable directive"); 8536 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8537 bool HasMapBasePtr = false; 8538 bool HasMapArraySec = false; 8539 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8540 const auto *EI = C->getVarRefs().begin(); 8541 for (const auto L : C->decl_component_lists(VD)) { 8542 const ValueDecl *VDecl, *Mapper; 8543 // The Expression is not correct if the mapping is implicit 8544 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8545 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8546 std::tie(VDecl, Components, Mapper) = L; 8547 assert(VDecl == VD && "We got information for the wrong declaration??"); 8548 assert(!Components.empty() && 8549 "Not expecting declaration with no component lists."); 8550 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E)) 8551 HasMapBasePtr = true; 8552 if (VD && E && VD->getType()->isAnyPointerType() && 8553 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E))) 8554 HasMapArraySec = true; 8555 DeclComponentLists.emplace_back(Components, C->getMapType(), 8556 C->getMapTypeModifiers(), 8557 C->isImplicit(), Mapper, E); 8558 ++EI; 8559 } 8560 } 8561 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8562 const MapData &RHS) { 8563 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8564 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8565 bool HasPresent = 8566 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8567 bool HasAllocs = MapType == OMPC_MAP_alloc; 8568 MapModifiers = std::get<2>(RHS); 8569 MapType = std::get<1>(LHS); 8570 bool HasPresentR = 8571 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8572 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8573 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8574 }); 8575 8576 // Find overlapping elements (including the offset from the base element). 8577 llvm::SmallDenseMap< 8578 const MapData *, 8579 llvm::SmallVector< 8580 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8581 4> 8582 OverlappedData; 8583 size_t Count = 0; 8584 for (const MapData &L : DeclComponentLists) { 8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8586 OpenMPMapClauseKind MapType; 8587 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8588 bool IsImplicit; 8589 const ValueDecl *Mapper; 8590 const Expr *VarRef; 8591 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8592 L; 8593 ++Count; 8594 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8595 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8596 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8597 VarRef) = L1; 8598 auto CI = Components.rbegin(); 8599 auto CE = Components.rend(); 8600 auto SI = Components1.rbegin(); 8601 auto SE = Components1.rend(); 8602 for (; CI != CE && SI != SE; ++CI, ++SI) { 8603 if (CI->getAssociatedExpression()->getStmtClass() != 8604 SI->getAssociatedExpression()->getStmtClass()) 8605 break; 8606 // Are we dealing with different variables/fields? 8607 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8608 break; 8609 } 8610 // Found overlapping if, at least for one component, reached the head 8611 // of the components list. 8612 if (CI == CE || SI == SE) { 8613 // Ignore it if it is the same component. 8614 if (CI == CE && SI == SE) 8615 continue; 8616 const auto It = (SI == SE) ? CI : SI; 8617 // If one component is a pointer and another one is a kind of 8618 // dereference of this pointer (array subscript, section, dereference, 8619 // etc.), it is not an overlapping. 8620 // Same, if one component is a base and another component is a 8621 // dereferenced pointer memberexpr with the same base. 8622 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8623 (std::prev(It)->getAssociatedDeclaration() && 8624 std::prev(It) 8625 ->getAssociatedDeclaration() 8626 ->getType() 8627 ->isPointerType()) || 8628 (It->getAssociatedDeclaration() && 8629 It->getAssociatedDeclaration()->getType()->isPointerType() && 8630 std::next(It) != CE && std::next(It) != SE)) 8631 continue; 8632 const MapData &BaseData = CI == CE ? L : L1; 8633 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8634 SI == SE ? Components : Components1; 8635 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8636 OverlappedElements.getSecond().push_back(SubData); 8637 } 8638 } 8639 } 8640 // Sort the overlapped elements for each item. 8641 llvm::SmallVector<const FieldDecl *, 4> Layout; 8642 if (!OverlappedData.empty()) { 8643 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8644 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8645 while (BaseType != OrigType) { 8646 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8647 OrigType = BaseType->getPointeeOrArrayElementType(); 8648 } 8649 8650 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8651 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8652 else { 8653 const auto *RD = BaseType->getAsRecordDecl(); 8654 Layout.append(RD->field_begin(), RD->field_end()); 8655 } 8656 } 8657 for (auto &Pair : OverlappedData) { 8658 llvm::stable_sort( 8659 Pair.getSecond(), 8660 [&Layout]( 8661 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8662 OMPClauseMappableExprCommon::MappableExprComponentListRef 8663 Second) { 8664 auto CI = First.rbegin(); 8665 auto CE = First.rend(); 8666 auto SI = Second.rbegin(); 8667 auto SE = Second.rend(); 8668 for (; CI != CE && SI != SE; ++CI, ++SI) { 8669 if (CI->getAssociatedExpression()->getStmtClass() != 8670 SI->getAssociatedExpression()->getStmtClass()) 8671 break; 8672 // Are we dealing with different variables/fields? 8673 if (CI->getAssociatedDeclaration() != 8674 SI->getAssociatedDeclaration()) 8675 break; 8676 } 8677 8678 // Lists contain the same elements. 8679 if (CI == CE && SI == SE) 8680 return false; 8681 8682 // List with less elements is less than list with more elements. 8683 if (CI == CE || SI == SE) 8684 return CI == CE; 8685 8686 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8687 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8688 if (FD1->getParent() == FD2->getParent()) 8689 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8690 const auto *It = 8691 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8692 return FD == FD1 || FD == FD2; 8693 }); 8694 return *It == FD1; 8695 }); 8696 } 8697 8698 // Associated with a capture, because the mapping flags depend on it. 8699 // Go through all of the elements with the overlapped elements. 8700 bool IsFirstComponentList = true; 8701 MapCombinedInfoTy StructBaseCombinedInfo; 8702 for (const auto &Pair : OverlappedData) { 8703 const MapData &L = *Pair.getFirst(); 8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8705 OpenMPMapClauseKind MapType; 8706 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8707 bool IsImplicit; 8708 const ValueDecl *Mapper; 8709 const Expr *VarRef; 8710 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8711 L; 8712 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8713 OverlappedComponents = Pair.getSecond(); 8714 generateInfoForComponentList( 8715 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8716 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8717 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8718 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8719 IsFirstComponentList = false; 8720 } 8721 // Go through other elements without overlapped elements. 8722 for (const MapData &L : DeclComponentLists) { 8723 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8724 OpenMPMapClauseKind MapType; 8725 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8726 bool IsImplicit; 8727 const ValueDecl *Mapper; 8728 const Expr *VarRef; 8729 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8730 L; 8731 auto It = OverlappedData.find(&L); 8732 if (It == OverlappedData.end()) 8733 generateInfoForComponentList( 8734 MapType, MapModifiers, std::nullopt, Components, CombinedInfo, 8735 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8736 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8737 /*ForDeviceAddr=*/false, VD, VarRef, 8738 /*OverlappedElements*/ std::nullopt, 8739 HasMapBasePtr && HasMapArraySec); 8740 IsFirstComponentList = false; 8741 } 8742 } 8743 8744 /// Generate the default map information for a given capture \a CI, 8745 /// record field declaration \a RI and captured value \a CV. generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const8746 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8747 const FieldDecl &RI, llvm::Value *CV, 8748 MapCombinedInfoTy &CombinedInfo) const { 8749 bool IsImplicit = true; 8750 // Do the default mapping. 8751 if (CI.capturesThis()) { 8752 CombinedInfo.Exprs.push_back(nullptr); 8753 CombinedInfo.BasePointers.push_back(CV); 8754 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8755 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8756 CombinedInfo.Pointers.push_back(CV); 8757 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8758 CombinedInfo.Sizes.push_back( 8759 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8760 CGF.Int64Ty, /*isSigned=*/true)); 8761 // Default map type. 8762 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8763 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8764 } else if (CI.capturesVariableByCopy()) { 8765 const VarDecl *VD = CI.getCapturedVar(); 8766 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8767 CombinedInfo.BasePointers.push_back(CV); 8768 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8769 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8770 CombinedInfo.Pointers.push_back(CV); 8771 if (!RI.getType()->isAnyPointerType()) { 8772 // We have to signal to the runtime captures passed by value that are 8773 // not pointers. 8774 CombinedInfo.Types.push_back( 8775 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8776 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8777 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8778 } else { 8779 // Pointers are implicitly mapped with a zero size and no flags 8780 // (other than first map that is added for all implicit maps). 8781 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8782 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8783 } 8784 auto I = FirstPrivateDecls.find(VD); 8785 if (I != FirstPrivateDecls.end()) 8786 IsImplicit = I->getSecond(); 8787 } else { 8788 assert(CI.capturesVariable() && "Expected captured reference."); 8789 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8790 QualType ElementType = PtrTy->getPointeeType(); 8791 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8792 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8793 // The default map type for a scalar/complex type is 'to' because by 8794 // default the value doesn't have to be retrieved. For an aggregate 8795 // type, the default is 'tofrom'. 8796 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8797 const VarDecl *VD = CI.getCapturedVar(); 8798 auto I = FirstPrivateDecls.find(VD); 8799 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8800 CombinedInfo.BasePointers.push_back(CV); 8801 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8802 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8803 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8804 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8805 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8806 AlignmentSource::Decl)); 8807 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF)); 8808 } else { 8809 CombinedInfo.Pointers.push_back(CV); 8810 } 8811 if (I != FirstPrivateDecls.end()) 8812 IsImplicit = I->getSecond(); 8813 } 8814 // Every default map produces a single argument which is a target parameter. 8815 CombinedInfo.Types.back() |= 8816 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8817 8818 // Add flag stating this is an implicit map. 8819 if (IsImplicit) 8820 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8821 8822 // No user-defined mapper for default mapping. 8823 CombinedInfo.Mappers.push_back(nullptr); 8824 } 8825 }; 8826 } // anonymous namespace 8827 8828 // Try to extract the base declaration from a `this->x` expression if possible. getDeclFromThisExpr(const Expr * E)8829 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8830 if (!E) 8831 return nullptr; 8832 8833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts())) 8834 if (const MemberExpr *ME = 8835 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8836 return ME->getMemberDecl(); 8837 return nullptr; 8838 } 8839 8840 /// Emit a string constant containing the names of the values mapped to the 8841 /// offloading runtime library. 8842 llvm::Constant * emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)8843 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 8844 MappableExprsHandler::MappingExprInfo &MapExprs) { 8845 8846 uint32_t SrcLocStrSize; 8847 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 8848 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 8849 8850 SourceLocation Loc; 8851 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 8852 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 8853 Loc = VD->getLocation(); 8854 else 8855 Loc = MapExprs.getMapExpr()->getExprLoc(); 8856 } else { 8857 Loc = MapExprs.getMapDecl()->getLocation(); 8858 } 8859 8860 std::string ExprName; 8861 if (MapExprs.getMapExpr()) { 8862 PrintingPolicy P(CGF.getContext().getLangOpts()); 8863 llvm::raw_string_ostream OS(ExprName); 8864 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 8865 OS.flush(); 8866 } else { 8867 ExprName = MapExprs.getMapDecl()->getNameAsString(); 8868 } 8869 8870 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 8871 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 8872 PLoc.getLine(), PLoc.getColumn(), 8873 SrcLocStrSize); 8874 } 8875 8876 /// Emit the arrays used to pass the captures and map information to the 8877 /// offloading runtime library. If there is no map or capture information, 8878 /// return nullptr by reference. emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false)8879 static void emitOffloadingArrays( 8880 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8881 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 8882 bool IsNonContiguous = false) { 8883 CodeGenModule &CGM = CGF.CGM; 8884 8885 // Reset the array information. 8886 Info.clearArrayInfo(); 8887 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8888 8889 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 8890 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 8891 CGF.AllocaInsertPt->getIterator()); 8892 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 8893 CGF.Builder.GetInsertPoint()); 8894 8895 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 8896 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 8897 }; 8898 if (CGM.getCodeGenOpts().getDebugInfo() != 8899 llvm::codegenoptions::NoDebugInfo) { 8900 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 8901 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 8902 FillInfoMap); 8903 } 8904 8905 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 8906 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 8907 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 8908 } 8909 }; 8910 8911 auto CustomMapperCB = [&](unsigned int I) { 8912 llvm::Value *MFunc = nullptr; 8913 if (CombinedInfo.Mappers[I]) { 8914 Info.HasMapper = true; 8915 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8916 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8917 } 8918 return MFunc; 8919 }; 8920 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, 8921 /*IsNonContiguous=*/true, DeviceAddrCB, 8922 CustomMapperCB); 8923 } 8924 8925 /// Check for inner distribute directive. 8926 static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8927 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8928 const auto *CS = D.getInnermostCapturedStmt(); 8929 const auto *Body = 8930 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8931 const Stmt *ChildStmt = 8932 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8933 8934 if (const auto *NestedDir = 8935 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8936 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8937 switch (D.getDirectiveKind()) { 8938 case OMPD_target: 8939 // For now, treat 'target' with nested 'teams loop' as if it's 8940 // distributed (target teams distribute). 8941 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 8942 return NestedDir; 8943 if (DKind == OMPD_teams) { 8944 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8945 /*IgnoreCaptured=*/true); 8946 if (!Body) 8947 return nullptr; 8948 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8949 if (const auto *NND = 8950 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8951 DKind = NND->getDirectiveKind(); 8952 if (isOpenMPDistributeDirective(DKind)) 8953 return NND; 8954 } 8955 } 8956 return nullptr; 8957 case OMPD_target_teams: 8958 if (isOpenMPDistributeDirective(DKind)) 8959 return NestedDir; 8960 return nullptr; 8961 case OMPD_target_parallel: 8962 case OMPD_target_simd: 8963 case OMPD_target_parallel_for: 8964 case OMPD_target_parallel_for_simd: 8965 return nullptr; 8966 case OMPD_target_teams_distribute: 8967 case OMPD_target_teams_distribute_simd: 8968 case OMPD_target_teams_distribute_parallel_for: 8969 case OMPD_target_teams_distribute_parallel_for_simd: 8970 case OMPD_parallel: 8971 case OMPD_for: 8972 case OMPD_parallel_for: 8973 case OMPD_parallel_master: 8974 case OMPD_parallel_sections: 8975 case OMPD_for_simd: 8976 case OMPD_parallel_for_simd: 8977 case OMPD_cancel: 8978 case OMPD_cancellation_point: 8979 case OMPD_ordered: 8980 case OMPD_threadprivate: 8981 case OMPD_allocate: 8982 case OMPD_task: 8983 case OMPD_simd: 8984 case OMPD_tile: 8985 case OMPD_unroll: 8986 case OMPD_sections: 8987 case OMPD_section: 8988 case OMPD_single: 8989 case OMPD_master: 8990 case OMPD_critical: 8991 case OMPD_taskyield: 8992 case OMPD_barrier: 8993 case OMPD_taskwait: 8994 case OMPD_taskgroup: 8995 case OMPD_atomic: 8996 case OMPD_flush: 8997 case OMPD_depobj: 8998 case OMPD_scan: 8999 case OMPD_teams: 9000 case OMPD_target_data: 9001 case OMPD_target_exit_data: 9002 case OMPD_target_enter_data: 9003 case OMPD_distribute: 9004 case OMPD_distribute_simd: 9005 case OMPD_distribute_parallel_for: 9006 case OMPD_distribute_parallel_for_simd: 9007 case OMPD_teams_distribute: 9008 case OMPD_teams_distribute_simd: 9009 case OMPD_teams_distribute_parallel_for: 9010 case OMPD_teams_distribute_parallel_for_simd: 9011 case OMPD_target_update: 9012 case OMPD_declare_simd: 9013 case OMPD_declare_variant: 9014 case OMPD_begin_declare_variant: 9015 case OMPD_end_declare_variant: 9016 case OMPD_declare_target: 9017 case OMPD_end_declare_target: 9018 case OMPD_declare_reduction: 9019 case OMPD_declare_mapper: 9020 case OMPD_taskloop: 9021 case OMPD_taskloop_simd: 9022 case OMPD_master_taskloop: 9023 case OMPD_master_taskloop_simd: 9024 case OMPD_parallel_master_taskloop: 9025 case OMPD_parallel_master_taskloop_simd: 9026 case OMPD_requires: 9027 case OMPD_metadirective: 9028 case OMPD_unknown: 9029 default: 9030 llvm_unreachable("Unexpected directive."); 9031 } 9032 } 9033 9034 return nullptr; 9035 } 9036 9037 /// Emit the user-defined mapper function. The code generation follows the 9038 /// pattern in the example below. 9039 /// \code 9040 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9041 /// void *base, void *begin, 9042 /// int64_t size, int64_t type, 9043 /// void *name = nullptr) { 9044 /// // Allocate space for an array section first or add a base/begin for 9045 /// // pointer dereference. 9046 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9047 /// !maptype.IsDelete) 9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9049 /// size*sizeof(Ty), clearToFromMember(type)); 9050 /// // Map members. 9051 /// for (unsigned i = 0; i < size; i++) { 9052 /// // For each component specified by this mapper: 9053 /// for (auto c : begin[i]->all_components) { 9054 /// if (c.hasMapper()) 9055 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9056 /// c.arg_type, c.arg_name); 9057 /// else 9058 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9059 /// c.arg_begin, c.arg_size, c.arg_type, 9060 /// c.arg_name); 9061 /// } 9062 /// } 9063 /// // Delete the array section. 9064 /// if (size > 1 && maptype.IsDelete) 9065 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9066 /// size*sizeof(Ty), clearToFromMember(type)); 9067 /// } 9068 /// \endcode emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9069 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9070 CodeGenFunction *CGF) { 9071 if (UDMMap.count(D) > 0) 9072 return; 9073 ASTContext &C = CGM.getContext(); 9074 QualType Ty = D->getType(); 9075 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9076 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9077 auto *MapperVarDecl = 9078 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9079 SourceLocation Loc = D->getLocation(); 9080 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9081 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9082 9083 // Prepare mapper function arguments and attributes. 9084 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9085 C.VoidPtrTy, ImplicitParamKind::Other); 9086 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9087 ImplicitParamKind::Other); 9088 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9089 C.VoidPtrTy, ImplicitParamKind::Other); 9090 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9091 ImplicitParamKind::Other); 9092 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9093 ImplicitParamKind::Other); 9094 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9095 ImplicitParamKind::Other); 9096 FunctionArgList Args; 9097 Args.push_back(&HandleArg); 9098 Args.push_back(&BaseArg); 9099 Args.push_back(&BeginArg); 9100 Args.push_back(&SizeArg); 9101 Args.push_back(&TypeArg); 9102 Args.push_back(&NameArg); 9103 const CGFunctionInfo &FnInfo = 9104 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9105 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9106 SmallString<64> TyStr; 9107 llvm::raw_svector_ostream Out(TyStr); 9108 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); 9109 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9110 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9111 Name, &CGM.getModule()); 9112 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9113 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9114 // Start the mapper function code generation. 9115 CodeGenFunction MapperCGF(CGM); 9116 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9117 // Compute the starting and end addresses of array elements. 9118 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9119 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9120 C.getPointerType(Int64Ty), Loc); 9121 // Prepare common arguments for array initiation and deletion. 9122 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9123 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9124 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9125 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9126 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9127 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9128 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9129 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9130 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9131 // Convert the size in bytes into the number of array elements. 9132 Size = MapperCGF.Builder.CreateExactUDiv( 9133 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9134 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9135 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9136 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); 9137 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9138 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9139 C.getPointerType(Int64Ty), Loc); 9140 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9141 MapperCGF.GetAddrOfLocalVar(&NameArg), 9142 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9143 9144 // Emit array initiation if this is an array section and \p MapType indicates 9145 // that memory allocation is required. 9146 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9147 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9148 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9149 9150 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9151 9152 // Emit the loop header block. 9153 MapperCGF.EmitBlock(HeadBB); 9154 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9155 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9156 // Evaluate whether the initial condition is satisfied. 9157 llvm::Value *IsEmpty = 9158 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9159 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9160 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9161 9162 // Emit the loop body block. 9163 MapperCGF.EmitBlock(BodyBB); 9164 llvm::BasicBlock *LastBB = BodyBB; 9165 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9166 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9167 PtrPHI->addIncoming(PtrBegin, EntryBB); 9168 Address PtrCurrent(PtrPHI, ElemTy, 9169 MapperCGF.GetAddrOfLocalVar(&BeginArg) 9170 .getAlignment() 9171 .alignmentOfArrayElement(ElementSize)); 9172 // Privatize the declared variable of mapper to be the current array element. 9173 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9174 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9175 (void)Scope.Privatize(); 9176 9177 // Get map clause information. Fill up the arrays with all mapped variables. 9178 MappableExprsHandler::MapCombinedInfoTy Info; 9179 MappableExprsHandler MEHandler(*D, MapperCGF); 9180 MEHandler.generateAllInfoForMapper(Info, OMPBuilder); 9181 9182 // Call the runtime API __tgt_mapper_num_components to get the number of 9183 // pre-existing components. 9184 llvm::Value *OffloadingArgs[] = {Handle}; 9185 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9186 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9187 OMPRTL___tgt_mapper_num_components), 9188 OffloadingArgs); 9189 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9190 PreviousSize, 9191 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9192 9193 // Fill up the runtime mapper handle for all components. 9194 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9195 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9196 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9197 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9198 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9199 llvm::Value *CurSizeArg = Info.Sizes[I]; 9200 llvm::Value *CurNameArg = 9201 (CGM.getCodeGenOpts().getDebugInfo() == 9202 llvm::codegenoptions::NoDebugInfo) 9203 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9204 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9205 9206 // Extract the MEMBER_OF field from the map type. 9207 llvm::Value *OriMapType = MapperCGF.Builder.getInt64( 9208 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9209 Info.Types[I])); 9210 llvm::Value *MemberMapType = 9211 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9212 9213 // Combine the map type inherited from user-defined mapper with that 9214 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9215 // bits of the \a MapType, which is the input argument of the mapper 9216 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9217 // bits of MemberMapType. 9218 // [OpenMP 5.0], 1.2.6. map-type decay. 9219 // | alloc | to | from | tofrom | release | delete 9220 // ---------------------------------------------------------- 9221 // alloc | alloc | alloc | alloc | alloc | release | delete 9222 // to | alloc | to | alloc | to | release | delete 9223 // from | alloc | alloc | from | from | release | delete 9224 // tofrom | alloc | to | from | tofrom | release | delete 9225 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9226 MapType, 9227 MapperCGF.Builder.getInt64( 9228 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9229 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9231 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9232 llvm::BasicBlock *AllocElseBB = 9233 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9234 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9235 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9236 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9237 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9238 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9239 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9240 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9241 MapperCGF.EmitBlock(AllocBB); 9242 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9243 MemberMapType, 9244 MapperCGF.Builder.getInt64( 9245 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9246 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9247 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9248 MapperCGF.Builder.CreateBr(EndBB); 9249 MapperCGF.EmitBlock(AllocElseBB); 9250 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9251 LeftToFrom, 9252 MapperCGF.Builder.getInt64( 9253 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9254 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9255 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9256 // In case of to, clear OMP_MAP_FROM. 9257 MapperCGF.EmitBlock(ToBB); 9258 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9259 MemberMapType, 9260 MapperCGF.Builder.getInt64( 9261 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9263 MapperCGF.Builder.CreateBr(EndBB); 9264 MapperCGF.EmitBlock(ToElseBB); 9265 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9266 LeftToFrom, 9267 MapperCGF.Builder.getInt64( 9268 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9269 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9270 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9271 // In case of from, clear OMP_MAP_TO. 9272 MapperCGF.EmitBlock(FromBB); 9273 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9274 MemberMapType, 9275 MapperCGF.Builder.getInt64( 9276 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9277 OpenMPOffloadMappingFlags::OMP_MAP_TO))); 9278 // In case of tofrom, do nothing. 9279 MapperCGF.EmitBlock(EndBB); 9280 LastBB = EndBB; 9281 llvm::PHINode *CurMapType = 9282 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9283 CurMapType->addIncoming(AllocMapType, AllocBB); 9284 CurMapType->addIncoming(ToMapType, ToBB); 9285 CurMapType->addIncoming(FromMapType, FromBB); 9286 CurMapType->addIncoming(MemberMapType, ToElseBB); 9287 9288 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9289 CurSizeArg, CurMapType, CurNameArg}; 9290 if (Info.Mappers[I]) { 9291 // Call the corresponding mapper function. 9292 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9293 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9294 assert(MapperFunc && "Expect a valid mapper function is available."); 9295 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9296 } else { 9297 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9298 // data structure. 9299 MapperCGF.EmitRuntimeCall( 9300 OMPBuilder.getOrCreateRuntimeFunction( 9301 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9302 OffloadingArgs); 9303 } 9304 } 9305 9306 // Update the pointer to point to the next element that needs to be mapped, 9307 // and check whether we have mapped all elements. 9308 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9309 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9310 PtrPHI->addIncoming(PtrNext, LastBB); 9311 llvm::Value *IsDone = 9312 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9313 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9314 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9315 9316 MapperCGF.EmitBlock(ExitBB); 9317 // Emit array deletion if this is an array section and \p MapType indicates 9318 // that deletion is required. 9319 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9320 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9321 9322 // Emit the function exit block. 9323 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9324 MapperCGF.FinishFunction(); 9325 UDMMap.try_emplace(D, Fn); 9326 if (CGF) { 9327 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9328 Decls.second.push_back(D); 9329 } 9330 } 9331 9332 /// Emit the array initialization or deletion portion for user-defined mapper 9333 /// code generation. First, it evaluates whether an array section is mapped and 9334 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9335 /// true, and \a MapType indicates to not delete this array, array 9336 /// initialization code is generated. If \a IsInit is false, and \a MapType 9337 /// indicates to not this array, array deletion code is generated. emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,llvm::Value * MapName,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9338 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9339 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9340 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9341 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9342 bool IsInit) { 9343 StringRef Prefix = IsInit ? ".init" : ".del"; 9344 9345 // Evaluate if this is an array section. 9346 llvm::BasicBlock *BodyBB = 9347 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9348 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9349 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9350 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9351 MapType, 9352 MapperCGF.Builder.getInt64( 9353 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); 9355 llvm::Value *DeleteCond; 9356 llvm::Value *Cond; 9357 if (IsInit) { 9358 // base != begin? 9359 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); 9360 // IsPtrAndObj? 9361 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9362 MapType, 9363 MapperCGF.Builder.getInt64( 9364 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9365 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); 9366 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9367 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9368 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9369 DeleteCond = MapperCGF.Builder.CreateIsNull( 9370 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9371 } else { 9372 Cond = IsArray; 9373 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9374 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9375 } 9376 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9377 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9378 9379 MapperCGF.EmitBlock(BodyBB); 9380 // Get the array size by multiplying element size and element number (i.e., \p 9381 // Size). 9382 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9383 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9384 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9385 // memory allocation/deletion purpose only. 9386 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9387 MapType, 9388 MapperCGF.Builder.getInt64( 9389 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9390 OpenMPOffloadMappingFlags::OMP_MAP_TO | 9391 OpenMPOffloadMappingFlags::OMP_MAP_FROM))); 9392 MapTypeArg = MapperCGF.Builder.CreateOr( 9393 MapTypeArg, 9394 MapperCGF.Builder.getInt64( 9395 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 9396 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); 9397 9398 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9399 // data structure. 9400 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9401 ArraySize, MapTypeArg, MapName}; 9402 MapperCGF.EmitRuntimeCall( 9403 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9404 OMPRTL___tgt_push_mapper_component), 9405 OffloadingArgs); 9406 } 9407 getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)9408 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9409 const OMPDeclareMapperDecl *D) { 9410 auto I = UDMMap.find(D); 9411 if (I != UDMMap.end()) 9412 return I->second; 9413 emitUserDefinedMapper(D); 9414 return UDMMap.lookup(D); 9415 } 9416 emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9417 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9418 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9419 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9420 const OMPLoopDirective &D)> 9421 SizeEmitter) { 9422 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9423 const OMPExecutableDirective *TD = &D; 9424 // Get nested teams distribute kind directive, if any. For now, treat 9425 // 'target_teams_loop' as if it's really a target_teams_distribute. 9426 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9427 Kind != OMPD_target_teams_loop) 9428 TD = getNestedDistributeDirective(CGM.getContext(), D); 9429 if (!TD) 9430 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9431 9432 const auto *LD = cast<OMPLoopDirective>(TD); 9433 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9434 return NumIterations; 9435 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9436 } 9437 9438 static void emitTargetCallFallback(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9439 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9440 const OMPExecutableDirective &D, 9441 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9442 bool RequiresOuterTask, const CapturedStmt &CS, 9443 bool OffloadingMandatory, CodeGenFunction &CGF) { 9444 if (OffloadingMandatory) { 9445 CGF.Builder.CreateUnreachable(); 9446 } else { 9447 if (RequiresOuterTask) { 9448 CapturedVars.clear(); 9449 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9450 } 9451 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9452 CapturedVars); 9453 } 9454 } 9455 emitDeviceID(llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,CodeGenFunction & CGF)9456 static llvm::Value *emitDeviceID( 9457 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9458 CodeGenFunction &CGF) { 9459 // Emit device ID if any. 9460 llvm::Value *DeviceID; 9461 if (Device.getPointer()) { 9462 assert((Device.getInt() == OMPC_DEVICE_unknown || 9463 Device.getInt() == OMPC_DEVICE_device_num) && 9464 "Expected device_num modifier."); 9465 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9466 DeviceID = 9467 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9468 } else { 9469 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9470 } 9471 return DeviceID; 9472 } 9473 emitDynCGGroupMem(const OMPExecutableDirective & D,CodeGenFunction & CGF)9474 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9475 CodeGenFunction &CGF) { 9476 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9477 9478 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9479 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9480 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9481 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9482 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9483 /*isSigned=*/false); 9484 } 9485 return DynCGroupMem; 9486 } 9487 emitTargetCallKernelLaunch(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::Value * OutlinedFnID,CodeGenFunction::OMPTargetDataInfo & InputInfo,llvm::Value * & MapTypesArray,llvm::Value * & MapNamesArray,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter,CodeGenFunction & CGF,CodeGenModule & CGM)9488 static void emitTargetCallKernelLaunch( 9489 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9490 const OMPExecutableDirective &D, 9491 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9492 const CapturedStmt &CS, bool OffloadingMandatory, 9493 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9494 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9495 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9496 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9497 const OMPLoopDirective &D)> 9498 SizeEmitter, 9499 CodeGenFunction &CGF, CodeGenModule &CGM) { 9500 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9501 9502 // Fill up the arrays with all the captured variables. 9503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9504 9505 // Get mappable expression information. 9506 MappableExprsHandler MEHandler(D, CGF); 9507 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9508 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9509 9510 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9511 auto *CV = CapturedVars.begin(); 9512 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9513 CE = CS.capture_end(); 9514 CI != CE; ++CI, ++RI, ++CV) { 9515 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9516 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9517 9518 // VLA sizes are passed to the outlined region by copy and do not have map 9519 // information associated. 9520 if (CI->capturesVariableArrayType()) { 9521 CurInfo.Exprs.push_back(nullptr); 9522 CurInfo.BasePointers.push_back(*CV); 9523 CurInfo.DevicePtrDecls.push_back(nullptr); 9524 CurInfo.DevicePointers.push_back( 9525 MappableExprsHandler::DeviceInfoTy::None); 9526 CurInfo.Pointers.push_back(*CV); 9527 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9528 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9529 // Copy to the device as an argument. No need to retrieve it. 9530 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9531 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9532 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9533 CurInfo.Mappers.push_back(nullptr); 9534 } else { 9535 // If we have any information in the map clause, we use it, otherwise we 9536 // just do a default mapping. 9537 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9538 if (!CI->capturesThis()) 9539 MappedVarSet.insert(CI->getCapturedVar()); 9540 else 9541 MappedVarSet.insert(nullptr); 9542 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9543 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9544 // Generate correct mapping for variables captured by reference in 9545 // lambdas. 9546 if (CI->capturesVariable()) 9547 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9548 CurInfo, LambdaPointers); 9549 } 9550 // We expect to have at least an element of information for this capture. 9551 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9552 "Non-existing map pointer for capture!"); 9553 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9554 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9555 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9556 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9557 "Inconsistent map information sizes!"); 9558 9559 // If there is an entry in PartialStruct it means we have a struct with 9560 // individual members mapped. Emit an extra combined entry. 9561 if (PartialStruct.Base.isValid()) { 9562 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9563 MEHandler.emitCombinedEntry( 9564 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), 9565 OMPBuilder, nullptr, 9566 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 9567 } 9568 9569 // We need to append the results of this capture to what we already have. 9570 CombinedInfo.append(CurInfo); 9571 } 9572 // Adjust MEMBER_OF flags for the lambdas captures. 9573 MEHandler.adjustMemberOfForLambdaCaptures( 9574 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, 9575 CombinedInfo.Pointers, CombinedInfo.Types); 9576 // Map any list items in a map clause that were not captures because they 9577 // weren't referenced within the construct. 9578 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); 9579 9580 CGOpenMPRuntime::TargetDataInfo Info; 9581 // Fill up the arrays and create the arguments. 9582 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 9583 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 9584 llvm::codegenoptions::NoDebugInfo; 9585 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 9586 EmitDebug, 9587 /*ForEndCall=*/false); 9588 9589 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9590 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9591 CGF.VoidPtrTy, CGM.getPointerAlign()); 9592 InputInfo.PointersArray = 9593 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9594 InputInfo.SizesArray = 9595 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9596 InputInfo.MappersArray = 9597 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9598 MapTypesArray = Info.RTArgs.MapTypesArray; 9599 MapNamesArray = Info.RTArgs.MapNamesArray; 9600 9601 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9602 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9603 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9604 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9605 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9606 9607 if (IsReverseOffloading) { 9608 // Reverse offloading is not supported, so just execute on the host. 9609 // FIXME: This fallback solution is incorrect since it ignores the 9610 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9611 // assert here and ensure SEMA emits an error. 9612 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9613 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9614 return; 9615 } 9616 9617 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9618 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9619 9620 llvm::Value *BasePointersArray = 9621 InputInfo.BasePointersArray.emitRawPointer(CGF); 9622 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); 9623 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); 9624 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); 9625 9626 auto &&EmitTargetCallFallbackCB = 9627 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9628 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9629 -> llvm::OpenMPIRBuilder::InsertPointTy { 9630 CGF.Builder.restoreIP(IP); 9631 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9632 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9633 return CGF.Builder.saveIP(); 9634 }; 9635 9636 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9637 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); 9638 llvm::Value *NumThreads = 9639 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); 9640 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9641 llvm::Value *NumIterations = 9642 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9643 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9644 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9645 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9646 9647 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9648 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9649 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9650 9651 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9652 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9653 DynCGGroupMem, HasNoWait); 9654 9655 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9656 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, 9657 DeviceID, RTLoc, AllocaIP)); 9658 }; 9659 9660 if (RequiresOuterTask) 9661 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9662 else 9663 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9664 } 9665 9666 static void emitTargetCallElse(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9667 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9668 const OMPExecutableDirective &D, 9669 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9670 bool RequiresOuterTask, const CapturedStmt &CS, 9671 bool OffloadingMandatory, CodeGenFunction &CGF) { 9672 9673 // Notify that the host version must be executed. 9674 auto &&ElseGen = 9675 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9676 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9677 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9678 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9679 }; 9680 9681 if (RequiresOuterTask) { 9682 CodeGenFunction::OMPTargetDataInfo InputInfo; 9683 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9684 } else { 9685 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9686 } 9687 } 9688 emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9689 void CGOpenMPRuntime::emitTargetCall( 9690 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9691 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9692 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9693 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9694 const OMPLoopDirective &D)> 9695 SizeEmitter) { 9696 if (!CGF.HaveInsertPoint()) 9697 return; 9698 9699 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9700 CGM.getLangOpts().OpenMPOffloadMandatory; 9701 9702 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9703 9704 const bool RequiresOuterTask = 9705 D.hasClausesOfKind<OMPDependClause>() || 9706 D.hasClausesOfKind<OMPNowaitClause>() || 9707 D.hasClausesOfKind<OMPInReductionClause>() || 9708 (CGM.getLangOpts().OpenMP >= 51 && 9709 needsTaskBasedThreadLimit(D.getDirectiveKind()) && 9710 D.hasClausesOfKind<OMPThreadLimitClause>()); 9711 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9712 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9713 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9714 PrePostActionTy &) { 9715 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9716 }; 9717 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9718 9719 CodeGenFunction::OMPTargetDataInfo InputInfo; 9720 llvm::Value *MapTypesArray = nullptr; 9721 llvm::Value *MapNamesArray = nullptr; 9722 9723 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9724 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9725 OutlinedFnID, &InputInfo, &MapTypesArray, 9726 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9727 PrePostActionTy &) { 9728 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9729 RequiresOuterTask, CS, OffloadingMandatory, 9730 Device, OutlinedFnID, InputInfo, MapTypesArray, 9731 MapNamesArray, SizeEmitter, CGF, CGM); 9732 }; 9733 9734 auto &&TargetElseGen = 9735 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9736 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9737 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9738 CS, OffloadingMandatory, CGF); 9739 }; 9740 9741 // If we have a target function ID it means that we need to support 9742 // offloading, otherwise, just execute on the host. We need to execute on host 9743 // regardless of the conditional in the if clause if, e.g., the user do not 9744 // specify target triples. 9745 if (OutlinedFnID) { 9746 if (IfCond) { 9747 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9748 } else { 9749 RegionCodeGenTy ThenRCG(TargetThenGen); 9750 ThenRCG(CGF); 9751 } 9752 } else { 9753 RegionCodeGenTy ElseRCG(TargetElseGen); 9754 ElseRCG(CGF); 9755 } 9756 } 9757 scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9758 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9759 StringRef ParentName) { 9760 if (!S) 9761 return; 9762 9763 // Codegen OMP target directives that offload compute to the device. 9764 bool RequiresDeviceCodegen = 9765 isa<OMPExecutableDirective>(S) && 9766 isOpenMPTargetExecutionDirective( 9767 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9768 9769 if (RequiresDeviceCodegen) { 9770 const auto &E = *cast<OMPExecutableDirective>(S); 9771 9772 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9773 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9774 9775 // Is this a target region that should not be emitted as an entry point? If 9776 // so just signal we are done with this target region. 9777 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9778 return; 9779 9780 switch (E.getDirectiveKind()) { 9781 case OMPD_target: 9782 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9783 cast<OMPTargetDirective>(E)); 9784 break; 9785 case OMPD_target_parallel: 9786 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9787 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9788 break; 9789 case OMPD_target_teams: 9790 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9791 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9792 break; 9793 case OMPD_target_teams_distribute: 9794 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9795 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9796 break; 9797 case OMPD_target_teams_distribute_simd: 9798 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9799 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9800 break; 9801 case OMPD_target_parallel_for: 9802 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9803 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9804 break; 9805 case OMPD_target_parallel_for_simd: 9806 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9807 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9808 break; 9809 case OMPD_target_simd: 9810 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9811 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9812 break; 9813 case OMPD_target_teams_distribute_parallel_for: 9814 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9815 CGM, ParentName, 9816 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9817 break; 9818 case OMPD_target_teams_distribute_parallel_for_simd: 9819 CodeGenFunction:: 9820 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9821 CGM, ParentName, 9822 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9823 break; 9824 case OMPD_target_teams_loop: 9825 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9826 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9827 break; 9828 case OMPD_target_parallel_loop: 9829 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9830 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9831 break; 9832 case OMPD_parallel: 9833 case OMPD_for: 9834 case OMPD_parallel_for: 9835 case OMPD_parallel_master: 9836 case OMPD_parallel_sections: 9837 case OMPD_for_simd: 9838 case OMPD_parallel_for_simd: 9839 case OMPD_cancel: 9840 case OMPD_cancellation_point: 9841 case OMPD_ordered: 9842 case OMPD_threadprivate: 9843 case OMPD_allocate: 9844 case OMPD_task: 9845 case OMPD_simd: 9846 case OMPD_tile: 9847 case OMPD_unroll: 9848 case OMPD_sections: 9849 case OMPD_section: 9850 case OMPD_single: 9851 case OMPD_master: 9852 case OMPD_critical: 9853 case OMPD_taskyield: 9854 case OMPD_barrier: 9855 case OMPD_taskwait: 9856 case OMPD_taskgroup: 9857 case OMPD_atomic: 9858 case OMPD_flush: 9859 case OMPD_depobj: 9860 case OMPD_scan: 9861 case OMPD_teams: 9862 case OMPD_target_data: 9863 case OMPD_target_exit_data: 9864 case OMPD_target_enter_data: 9865 case OMPD_distribute: 9866 case OMPD_distribute_simd: 9867 case OMPD_distribute_parallel_for: 9868 case OMPD_distribute_parallel_for_simd: 9869 case OMPD_teams_distribute: 9870 case OMPD_teams_distribute_simd: 9871 case OMPD_teams_distribute_parallel_for: 9872 case OMPD_teams_distribute_parallel_for_simd: 9873 case OMPD_target_update: 9874 case OMPD_declare_simd: 9875 case OMPD_declare_variant: 9876 case OMPD_begin_declare_variant: 9877 case OMPD_end_declare_variant: 9878 case OMPD_declare_target: 9879 case OMPD_end_declare_target: 9880 case OMPD_declare_reduction: 9881 case OMPD_declare_mapper: 9882 case OMPD_taskloop: 9883 case OMPD_taskloop_simd: 9884 case OMPD_master_taskloop: 9885 case OMPD_master_taskloop_simd: 9886 case OMPD_parallel_master_taskloop: 9887 case OMPD_parallel_master_taskloop_simd: 9888 case OMPD_requires: 9889 case OMPD_metadirective: 9890 case OMPD_unknown: 9891 default: 9892 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9893 } 9894 return; 9895 } 9896 9897 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9898 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9899 return; 9900 9901 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9902 return; 9903 } 9904 9905 // If this is a lambda function, look into its body. 9906 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9907 S = L->getBody(); 9908 9909 // Keep looking for target regions recursively. 9910 for (const Stmt *II : S->children()) 9911 scanForTargetRegionsFunctions(II, ParentName); 9912 } 9913 isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)9914 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 9915 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9916 OMPDeclareTargetDeclAttr::getDeviceType(VD); 9917 if (!DevTy) 9918 return false; 9919 // Do not emit device_type(nohost) functions for the host. 9920 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9921 return true; 9922 // Do not emit device_type(host) functions for the device. 9923 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9924 return true; 9925 return false; 9926 } 9927 emitTargetFunctions(GlobalDecl GD)9928 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9929 // If emitting code for the host, we do not process FD here. Instead we do 9930 // the normal code generation. 9931 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 9932 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 9933 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9934 CGM.getLangOpts().OpenMPIsTargetDevice)) 9935 return true; 9936 return false; 9937 } 9938 9939 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9940 // Try to detect target regions in the function. 9941 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9942 StringRef Name = CGM.getMangledName(GD); 9943 scanForTargetRegionsFunctions(FD->getBody(), Name); 9944 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9945 CGM.getLangOpts().OpenMPIsTargetDevice)) 9946 return true; 9947 } 9948 9949 // Do not to emit function if it is not marked as declare target. 9950 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9951 AlreadyEmittedTargetDecls.count(VD) == 0; 9952 } 9953 emitTargetGlobalVariable(GlobalDecl GD)9954 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9955 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 9956 CGM.getLangOpts().OpenMPIsTargetDevice)) 9957 return true; 9958 9959 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 9960 return false; 9961 9962 // Check if there are Ctors/Dtors in this declaration and look for target 9963 // regions in it. We use the complete variant to produce the kernel name 9964 // mangling. 9965 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9966 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9967 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9968 StringRef ParentName = 9969 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9970 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9971 } 9972 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9973 StringRef ParentName = 9974 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9975 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9976 } 9977 } 9978 9979 // Do not to emit variable if it is not marked as declare target. 9980 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9982 cast<VarDecl>(GD.getDecl())); 9983 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9984 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9986 HasRequiresUnifiedSharedMemory)) { 9987 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9988 return true; 9989 } 9990 return false; 9991 } 9992 registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9993 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9994 llvm::Constant *Addr) { 9995 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9996 !CGM.getLangOpts().OpenMPIsTargetDevice) 9997 return; 9998 9999 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10001 10002 // If this is an 'extern' declaration we defer to the canonical definition and 10003 // do not emit an offloading entry. 10004 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 10005 VD->hasExternalStorage()) 10006 return; 10007 10008 if (!Res) { 10009 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10010 // Register non-target variables being emitted in device code (debug info 10011 // may cause this). 10012 StringRef VarName = CGM.getMangledName(VD); 10013 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10014 } 10015 return; 10016 } 10017 10018 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 10019 auto LinkageForVariable = [&VD, this]() { 10020 return CGM.getLLVMLinkageVarDefinition(VD); 10021 }; 10022 10023 std::vector<llvm::GlobalVariable *> GeneratedRefs; 10024 OMPBuilder.registerTargetGlobalVariable( 10025 convertCaptureClause(VD), convertDeviceClause(VD), 10026 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 10027 VD->isExternallyVisible(), 10028 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 10029 VD->getCanonicalDecl()->getBeginLoc()), 10030 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 10031 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 10032 CGM.getTypes().ConvertTypeForMem( 10033 CGM.getContext().getPointerType(VD->getType())), 10034 Addr); 10035 10036 for (auto *ref : GeneratedRefs) 10037 CGM.addCompilerUsedGlobal(ref); 10038 } 10039 emitTargetGlobal(GlobalDecl GD)10040 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10041 if (isa<FunctionDecl>(GD.getDecl()) || 10042 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10043 return emitTargetFunctions(GD); 10044 10045 return emitTargetGlobalVariable(GD); 10046 } 10047 emitDeferredTargetDecls() const10048 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10049 for (const VarDecl *VD : DeferredGlobalVariables) { 10050 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10052 if (!Res) 10053 continue; 10054 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10055 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10056 !HasRequiresUnifiedSharedMemory) { 10057 CGM.EmitGlobal(VD); 10058 } else { 10059 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10060 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 10061 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 10062 HasRequiresUnifiedSharedMemory)) && 10063 "Expected link clause or to clause with unified memory."); 10064 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10065 } 10066 } 10067 } 10068 adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10069 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10070 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10071 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10072 " Expected target-based directive."); 10073 } 10074 processRequiresDirective(const OMPRequiresDecl * D)10075 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10076 for (const OMPClause *Clause : D->clauselists()) { 10077 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10078 HasRequiresUnifiedSharedMemory = true; 10079 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 10080 } else if (const auto *AC = 10081 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10082 switch (AC->getAtomicDefaultMemOrderKind()) { 10083 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10084 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10085 break; 10086 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10087 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10088 break; 10089 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10090 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10091 break; 10092 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10093 break; 10094 } 10095 } 10096 } 10097 } 10098 getDefaultMemoryOrdering() const10099 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10100 return RequiresAtomicOrdering; 10101 } 10102 hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10103 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10104 LangAS &AS) { 10105 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10106 return false; 10107 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10108 switch(A->getAllocatorType()) { 10109 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10110 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10111 // Not supported, fallback to the default mem space. 10112 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10113 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10114 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10115 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10116 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10117 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10118 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10119 AS = LangAS::Default; 10120 return true; 10121 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10122 llvm_unreachable("Expected predefined allocator for the variables with the " 10123 "static storage."); 10124 } 10125 return false; 10126 } 10127 hasRequiresUnifiedSharedMemory() const10128 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10129 return HasRequiresUnifiedSharedMemory; 10130 } 10131 DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10132 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10133 CodeGenModule &CGM) 10134 : CGM(CGM) { 10135 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 10136 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10137 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10138 } 10139 } 10140 ~DisableAutoDeclareTargetRAII()10141 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10142 if (CGM.getLangOpts().OpenMPIsTargetDevice) 10143 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10144 } 10145 markAsGlobalTarget(GlobalDecl GD)10146 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10147 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 10148 return true; 10149 10150 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10151 // Do not to emit function if it is marked as declare target as it was already 10152 // emitted. 10153 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10154 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10155 if (auto *F = dyn_cast_or_null<llvm::Function>( 10156 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10157 return !F->isDeclaration(); 10158 return false; 10159 } 10160 return true; 10161 } 10162 10163 return !AlreadyEmittedTargetDecls.insert(D).second; 10164 } 10165 emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10166 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10167 const OMPExecutableDirective &D, 10168 SourceLocation Loc, 10169 llvm::Function *OutlinedFn, 10170 ArrayRef<llvm::Value *> CapturedVars) { 10171 if (!CGF.HaveInsertPoint()) 10172 return; 10173 10174 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10175 CodeGenFunction::RunCleanupsScope Scope(CGF); 10176 10177 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10178 llvm::Value *Args[] = { 10179 RTLoc, 10180 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10181 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10182 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10183 RealArgs.append(std::begin(Args), std::end(Args)); 10184 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10185 10186 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10187 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10188 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10189 } 10190 emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10191 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10192 const Expr *NumTeams, 10193 const Expr *ThreadLimit, 10194 SourceLocation Loc) { 10195 if (!CGF.HaveInsertPoint()) 10196 return; 10197 10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10199 10200 llvm::Value *NumTeamsVal = 10201 NumTeams 10202 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10203 CGF.CGM.Int32Ty, /* isSigned = */ true) 10204 : CGF.Builder.getInt32(0); 10205 10206 llvm::Value *ThreadLimitVal = 10207 ThreadLimit 10208 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10209 CGF.CGM.Int32Ty, /* isSigned = */ true) 10210 : CGF.Builder.getInt32(0); 10211 10212 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10213 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10214 ThreadLimitVal}; 10215 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10216 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10217 PushNumTeamsArgs); 10218 } 10219 emitThreadLimitClause(CodeGenFunction & CGF,const Expr * ThreadLimit,SourceLocation Loc)10220 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, 10221 const Expr *ThreadLimit, 10222 SourceLocation Loc) { 10223 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10224 llvm::Value *ThreadLimitVal = 10225 ThreadLimit 10226 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10227 CGF.CGM.Int32Ty, /* isSigned = */ true) 10228 : CGF.Builder.getInt32(0); 10229 10230 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) 10231 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), 10232 ThreadLimitVal}; 10233 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10234 CGM.getModule(), OMPRTL___kmpc_set_thread_limit), 10235 ThreadLimitArgs); 10236 } 10237 emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)10238 void CGOpenMPRuntime::emitTargetDataCalls( 10239 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10240 const Expr *Device, const RegionCodeGenTy &CodeGen, 10241 CGOpenMPRuntime::TargetDataInfo &Info) { 10242 if (!CGF.HaveInsertPoint()) 10243 return; 10244 10245 // Action used to replace the default codegen action and turn privatization 10246 // off. 10247 PrePostActionTy NoPrivAction; 10248 10249 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10250 10251 llvm::Value *IfCondVal = nullptr; 10252 if (IfCond) 10253 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10254 10255 // Emit device ID if any. 10256 llvm::Value *DeviceID = nullptr; 10257 if (Device) { 10258 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10259 CGF.Int64Ty, /*isSigned=*/true); 10260 } else { 10261 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10262 } 10263 10264 // Fill up the arrays with all the mapped variables. 10265 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10266 auto GenMapInfoCB = 10267 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10268 CGF.Builder.restoreIP(CodeGenIP); 10269 // Get map clause information. 10270 MappableExprsHandler MEHandler(D, CGF); 10271 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10272 10273 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10274 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10275 }; 10276 if (CGM.getCodeGenOpts().getDebugInfo() != 10277 llvm::codegenoptions::NoDebugInfo) { 10278 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10279 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10280 FillInfoMap); 10281 } 10282 10283 return CombinedInfo; 10284 }; 10285 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10286 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10287 CGF.Builder.restoreIP(CodeGenIP); 10288 switch (BodyGenType) { 10289 case BodyGenTy::Priv: 10290 if (!Info.CaptureDeviceAddrMap.empty()) 10291 CodeGen(CGF); 10292 break; 10293 case BodyGenTy::DupNoPriv: 10294 if (!Info.CaptureDeviceAddrMap.empty()) { 10295 CodeGen.setAction(NoPrivAction); 10296 CodeGen(CGF); 10297 } 10298 break; 10299 case BodyGenTy::NoPriv: 10300 if (Info.CaptureDeviceAddrMap.empty()) { 10301 CodeGen.setAction(NoPrivAction); 10302 CodeGen(CGF); 10303 } 10304 break; 10305 } 10306 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10307 CGF.Builder.GetInsertPoint()); 10308 }; 10309 10310 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10311 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10312 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10313 } 10314 }; 10315 10316 auto CustomMapperCB = [&](unsigned int I) { 10317 llvm::Value *MFunc = nullptr; 10318 if (CombinedInfo.Mappers[I]) { 10319 Info.HasMapper = true; 10320 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10321 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10322 } 10323 return MFunc; 10324 }; 10325 10326 // Source location for the ident struct 10327 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10328 10329 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10330 CGF.AllocaInsertPt->getIterator()); 10331 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10332 CGF.Builder.GetInsertPoint()); 10333 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10334 CGF.Builder.restoreIP(OMPBuilder.createTargetData( 10335 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10336 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10337 } 10338 emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10341 const Expr *Device) { 10342 if (!CGF.HaveInsertPoint()) 10343 return; 10344 10345 assert((isa<OMPTargetEnterDataDirective>(D) || 10346 isa<OMPTargetExitDataDirective>(D) || 10347 isa<OMPTargetUpdateDirective>(D)) && 10348 "Expecting either target enter, exit data, or update directives."); 10349 10350 CodeGenFunction::OMPTargetDataInfo InputInfo; 10351 llvm::Value *MapTypesArray = nullptr; 10352 llvm::Value *MapNamesArray = nullptr; 10353 // Generate the code for the opening of the data environment. 10354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10356 // Emit device ID if any. 10357 llvm::Value *DeviceID = nullptr; 10358 if (Device) { 10359 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10360 CGF.Int64Ty, /*isSigned=*/true); 10361 } else { 10362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10363 } 10364 10365 // Emit the number of elements in the offloading arrays. 10366 llvm::Constant *PointerNum = 10367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10368 10369 // Source location for the ident struct 10370 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10371 10372 SmallVector<llvm::Value *, 13> OffloadingArgs( 10373 {RTLoc, DeviceID, PointerNum, 10374 InputInfo.BasePointersArray.emitRawPointer(CGF), 10375 InputInfo.PointersArray.emitRawPointer(CGF), 10376 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, 10377 InputInfo.MappersArray.emitRawPointer(CGF)}); 10378 10379 // Select the right runtime function call for each standalone 10380 // directive. 10381 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10382 RuntimeFunction RTLFn; 10383 switch (D.getDirectiveKind()) { 10384 case OMPD_target_enter_data: 10385 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10386 : OMPRTL___tgt_target_data_begin_mapper; 10387 break; 10388 case OMPD_target_exit_data: 10389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10390 : OMPRTL___tgt_target_data_end_mapper; 10391 break; 10392 case OMPD_target_update: 10393 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10394 : OMPRTL___tgt_target_data_update_mapper; 10395 break; 10396 case OMPD_parallel: 10397 case OMPD_for: 10398 case OMPD_parallel_for: 10399 case OMPD_parallel_master: 10400 case OMPD_parallel_sections: 10401 case OMPD_for_simd: 10402 case OMPD_parallel_for_simd: 10403 case OMPD_cancel: 10404 case OMPD_cancellation_point: 10405 case OMPD_ordered: 10406 case OMPD_threadprivate: 10407 case OMPD_allocate: 10408 case OMPD_task: 10409 case OMPD_simd: 10410 case OMPD_tile: 10411 case OMPD_unroll: 10412 case OMPD_sections: 10413 case OMPD_section: 10414 case OMPD_single: 10415 case OMPD_master: 10416 case OMPD_critical: 10417 case OMPD_taskyield: 10418 case OMPD_barrier: 10419 case OMPD_taskwait: 10420 case OMPD_taskgroup: 10421 case OMPD_atomic: 10422 case OMPD_flush: 10423 case OMPD_depobj: 10424 case OMPD_scan: 10425 case OMPD_teams: 10426 case OMPD_target_data: 10427 case OMPD_distribute: 10428 case OMPD_distribute_simd: 10429 case OMPD_distribute_parallel_for: 10430 case OMPD_distribute_parallel_for_simd: 10431 case OMPD_teams_distribute: 10432 case OMPD_teams_distribute_simd: 10433 case OMPD_teams_distribute_parallel_for: 10434 case OMPD_teams_distribute_parallel_for_simd: 10435 case OMPD_declare_simd: 10436 case OMPD_declare_variant: 10437 case OMPD_begin_declare_variant: 10438 case OMPD_end_declare_variant: 10439 case OMPD_declare_target: 10440 case OMPD_end_declare_target: 10441 case OMPD_declare_reduction: 10442 case OMPD_declare_mapper: 10443 case OMPD_taskloop: 10444 case OMPD_taskloop_simd: 10445 case OMPD_master_taskloop: 10446 case OMPD_master_taskloop_simd: 10447 case OMPD_parallel_master_taskloop: 10448 case OMPD_parallel_master_taskloop_simd: 10449 case OMPD_target: 10450 case OMPD_target_simd: 10451 case OMPD_target_teams_distribute: 10452 case OMPD_target_teams_distribute_simd: 10453 case OMPD_target_teams_distribute_parallel_for: 10454 case OMPD_target_teams_distribute_parallel_for_simd: 10455 case OMPD_target_teams: 10456 case OMPD_target_parallel: 10457 case OMPD_target_parallel_for: 10458 case OMPD_target_parallel_for_simd: 10459 case OMPD_requires: 10460 case OMPD_metadirective: 10461 case OMPD_unknown: 10462 default: 10463 llvm_unreachable("Unexpected standalone target data directive."); 10464 break; 10465 } 10466 if (HasNowait) { 10467 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10468 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10469 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10470 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10471 } 10472 CGF.EmitRuntimeCall( 10473 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10474 OffloadingArgs); 10475 }; 10476 10477 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10478 &MapNamesArray](CodeGenFunction &CGF, 10479 PrePostActionTy &) { 10480 // Fill up the arrays with all the mapped variables. 10481 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10482 10483 // Get map clause information. 10484 MappableExprsHandler MEHandler(D, CGF); 10485 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10486 10487 CGOpenMPRuntime::TargetDataInfo Info; 10488 // Fill up the arrays and create the arguments. 10489 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10490 /*IsNonContiguous=*/true); 10491 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10492 D.hasClausesOfKind<OMPNowaitClause>(); 10493 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != 10494 llvm::codegenoptions::NoDebugInfo; 10495 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, 10496 EmitDebug, 10497 /*ForEndCall=*/false); 10498 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10499 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10500 CGF.VoidPtrTy, CGM.getPointerAlign()); 10501 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10502 CGM.getPointerAlign()); 10503 InputInfo.SizesArray = 10504 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10505 InputInfo.MappersArray = 10506 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10507 MapTypesArray = Info.RTArgs.MapTypesArray; 10508 MapNamesArray = Info.RTArgs.MapNamesArray; 10509 if (RequiresOuterTask) 10510 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10511 else 10512 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10513 }; 10514 10515 if (IfCond) { 10516 emitIfClause(CGF, IfCond, TargetThenGen, 10517 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10518 } else { 10519 RegionCodeGenTy ThenRCG(TargetThenGen); 10520 ThenRCG(CGF); 10521 } 10522 } 10523 10524 namespace { 10525 /// Kind of parameter in a function with 'declare simd' directive. 10526 enum ParamKindTy { 10527 Linear, 10528 LinearRef, 10529 LinearUVal, 10530 LinearVal, 10531 Uniform, 10532 Vector, 10533 }; 10534 /// Attribute set of the parameter. 10535 struct ParamAttrTy { 10536 ParamKindTy Kind = Vector; 10537 llvm::APSInt StrideOrArg; 10538 llvm::APSInt Alignment; 10539 bool HasVarStride = false; 10540 }; 10541 } // namespace 10542 evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10543 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10544 ArrayRef<ParamAttrTy> ParamAttrs) { 10545 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10546 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10547 // of that clause. The VLEN value must be power of 2. 10548 // In other case the notion of the function`s "characteristic data type" (CDT) 10549 // is used to compute the vector length. 10550 // CDT is defined in the following order: 10551 // a) For non-void function, the CDT is the return type. 10552 // b) If the function has any non-uniform, non-linear parameters, then the 10553 // CDT is the type of the first such parameter. 10554 // c) If the CDT determined by a) or b) above is struct, union, or class 10555 // type which is pass-by-value (except for the type that maps to the 10556 // built-in complex data type), the characteristic data type is int. 10557 // d) If none of the above three cases is applicable, the CDT is int. 10558 // The VLEN is then determined based on the CDT and the size of vector 10559 // register of that ISA for which current vector version is generated. The 10560 // VLEN is computed using the formula below: 10561 // VLEN = sizeof(vector_register) / sizeof(CDT), 10562 // where vector register size specified in section 3.2.1 Registers and the 10563 // Stack Frame of original AMD64 ABI document. 10564 QualType RetType = FD->getReturnType(); 10565 if (RetType.isNull()) 10566 return 0; 10567 ASTContext &C = FD->getASTContext(); 10568 QualType CDT; 10569 if (!RetType.isNull() && !RetType->isVoidType()) { 10570 CDT = RetType; 10571 } else { 10572 unsigned Offset = 0; 10573 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10574 if (ParamAttrs[Offset].Kind == Vector) 10575 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10576 ++Offset; 10577 } 10578 if (CDT.isNull()) { 10579 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10580 if (ParamAttrs[I + Offset].Kind == Vector) { 10581 CDT = FD->getParamDecl(I)->getType(); 10582 break; 10583 } 10584 } 10585 } 10586 } 10587 if (CDT.isNull()) 10588 CDT = C.IntTy; 10589 CDT = CDT->getCanonicalTypeUnqualified(); 10590 if (CDT->isRecordType() || CDT->isUnionType()) 10591 CDT = C.IntTy; 10592 return C.getTypeSize(CDT); 10593 } 10594 10595 /// Mangle the parameter part of the vector function name according to 10596 /// their OpenMP classification. The mangling function is defined in 10597 /// section 4.5 of the AAVFABI(2021Q1). mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10598 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10599 SmallString<256> Buffer; 10600 llvm::raw_svector_ostream Out(Buffer); 10601 for (const auto &ParamAttr : ParamAttrs) { 10602 switch (ParamAttr.Kind) { 10603 case Linear: 10604 Out << 'l'; 10605 break; 10606 case LinearRef: 10607 Out << 'R'; 10608 break; 10609 case LinearUVal: 10610 Out << 'U'; 10611 break; 10612 case LinearVal: 10613 Out << 'L'; 10614 break; 10615 case Uniform: 10616 Out << 'u'; 10617 break; 10618 case Vector: 10619 Out << 'v'; 10620 break; 10621 } 10622 if (ParamAttr.HasVarStride) 10623 Out << "s" << ParamAttr.StrideOrArg; 10624 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10625 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10626 // Don't print the step value if it is not present or if it is 10627 // equal to 1. 10628 if (ParamAttr.StrideOrArg < 0) 10629 Out << 'n' << -ParamAttr.StrideOrArg; 10630 else if (ParamAttr.StrideOrArg != 1) 10631 Out << ParamAttr.StrideOrArg; 10632 } 10633 10634 if (!!ParamAttr.Alignment) 10635 Out << 'a' << ParamAttr.Alignment; 10636 } 10637 10638 return std::string(Out.str()); 10639 } 10640 10641 static void emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10642 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10643 const llvm::APSInt &VLENVal, 10644 ArrayRef<ParamAttrTy> ParamAttrs, 10645 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10646 struct ISADataTy { 10647 char ISA; 10648 unsigned VecRegSize; 10649 }; 10650 ISADataTy ISAData[] = { 10651 { 10652 'b', 128 10653 }, // SSE 10654 { 10655 'c', 256 10656 }, // AVX 10657 { 10658 'd', 256 10659 }, // AVX2 10660 { 10661 'e', 512 10662 }, // AVX512 10663 }; 10664 llvm::SmallVector<char, 2> Masked; 10665 switch (State) { 10666 case OMPDeclareSimdDeclAttr::BS_Undefined: 10667 Masked.push_back('N'); 10668 Masked.push_back('M'); 10669 break; 10670 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10671 Masked.push_back('N'); 10672 break; 10673 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10674 Masked.push_back('M'); 10675 break; 10676 } 10677 for (char Mask : Masked) { 10678 for (const ISADataTy &Data : ISAData) { 10679 SmallString<256> Buffer; 10680 llvm::raw_svector_ostream Out(Buffer); 10681 Out << "_ZGV" << Data.ISA << Mask; 10682 if (!VLENVal) { 10683 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10684 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10685 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10686 } else { 10687 Out << VLENVal; 10688 } 10689 Out << mangleVectorParameters(ParamAttrs); 10690 Out << '_' << Fn->getName(); 10691 Fn->addFnAttr(Out.str()); 10692 } 10693 } 10694 } 10695 10696 // This are the Functions that are needed to mangle the name of the 10697 // vector functions generated by the compiler, according to the rules 10698 // defined in the "Vector Function ABI specifications for AArch64", 10699 // available at 10700 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10701 10702 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). getAArch64MTV(QualType QT,ParamKindTy Kind)10703 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10704 QT = QT.getCanonicalType(); 10705 10706 if (QT->isVoidType()) 10707 return false; 10708 10709 if (Kind == ParamKindTy::Uniform) 10710 return false; 10711 10712 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10713 return false; 10714 10715 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10716 !QT->isReferenceType()) 10717 return false; 10718 10719 return true; 10720 } 10721 10722 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. getAArch64PBV(QualType QT,ASTContext & C)10723 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10724 QT = QT.getCanonicalType(); 10725 unsigned Size = C.getTypeSize(QT); 10726 10727 // Only scalars and complex within 16 bytes wide set PVB to true. 10728 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10729 return false; 10730 10731 if (QT->isFloatingType()) 10732 return true; 10733 10734 if (QT->isIntegerType()) 10735 return true; 10736 10737 if (QT->isPointerType()) 10738 return true; 10739 10740 // TODO: Add support for complex types (section 3.1.2, item 2). 10741 10742 return false; 10743 } 10744 10745 /// Computes the lane size (LS) of a return type or of an input parameter, 10746 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10747 /// TODO: Add support for references, section 3.2.1, item 1. getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10748 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10749 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10750 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10751 if (getAArch64PBV(PTy, C)) 10752 return C.getTypeSize(PTy); 10753 } 10754 if (getAArch64PBV(QT, C)) 10755 return C.getTypeSize(QT); 10756 10757 return C.getTypeSize(C.getUIntPtrType()); 10758 } 10759 10760 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10761 // signature of the scalar function, as defined in 3.2.2 of the 10762 // AAVFABI. 10763 static std::tuple<unsigned, unsigned, bool> getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10764 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10765 QualType RetType = FD->getReturnType().getCanonicalType(); 10766 10767 ASTContext &C = FD->getASTContext(); 10768 10769 bool OutputBecomesInput = false; 10770 10771 llvm::SmallVector<unsigned, 8> Sizes; 10772 if (!RetType->isVoidType()) { 10773 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10774 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10775 OutputBecomesInput = true; 10776 } 10777 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10778 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10779 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10780 } 10781 10782 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10783 // The LS of a function parameter / return value can only be a power 10784 // of 2, starting from 8 bits, up to 128. 10785 assert(llvm::all_of(Sizes, 10786 [](unsigned Size) { 10787 return Size == 8 || Size == 16 || Size == 32 || 10788 Size == 64 || Size == 128; 10789 }) && 10790 "Invalid size"); 10791 10792 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10793 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10794 OutputBecomesInput); 10795 } 10796 10797 // Function used to add the attribute. The parameter `VLEN` is 10798 // templated to allow the use of "x" when targeting scalable functions 10799 // for SVE. 10800 template <typename T> addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10801 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10802 char ISA, StringRef ParSeq, 10803 StringRef MangledName, bool OutputBecomesInput, 10804 llvm::Function *Fn) { 10805 SmallString<256> Buffer; 10806 llvm::raw_svector_ostream Out(Buffer); 10807 Out << Prefix << ISA << LMask << VLEN; 10808 if (OutputBecomesInput) 10809 Out << "v"; 10810 Out << ParSeq << "_" << MangledName; 10811 Fn->addFnAttr(Out.str()); 10812 } 10813 10814 // Helper function to generate the Advanced SIMD names depending on 10815 // the value of the NDS when simdlen is not present. addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10816 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10817 StringRef Prefix, char ISA, 10818 StringRef ParSeq, StringRef MangledName, 10819 bool OutputBecomesInput, 10820 llvm::Function *Fn) { 10821 switch (NDS) { 10822 case 8: 10823 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10824 OutputBecomesInput, Fn); 10825 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10826 OutputBecomesInput, Fn); 10827 break; 10828 case 16: 10829 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10830 OutputBecomesInput, Fn); 10831 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10832 OutputBecomesInput, Fn); 10833 break; 10834 case 32: 10835 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10836 OutputBecomesInput, Fn); 10837 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10838 OutputBecomesInput, Fn); 10839 break; 10840 case 64: 10841 case 128: 10842 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10843 OutputBecomesInput, Fn); 10844 break; 10845 default: 10846 llvm_unreachable("Scalar type is too wide."); 10847 } 10848 } 10849 10850 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10851 static void emitAArch64DeclareSimdFunction( 10852 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10853 ArrayRef<ParamAttrTy> ParamAttrs, 10854 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10855 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10856 10857 // Get basic data for building the vector signature. 10858 const auto Data = getNDSWDS(FD, ParamAttrs); 10859 const unsigned NDS = std::get<0>(Data); 10860 const unsigned WDS = std::get<1>(Data); 10861 const bool OutputBecomesInput = std::get<2>(Data); 10862 10863 // Check the values provided via `simdlen` by the user. 10864 // 1. A `simdlen(1)` doesn't produce vector signatures, 10865 if (UserVLEN == 1) { 10866 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10867 DiagnosticsEngine::Warning, 10868 "The clause simdlen(1) has no effect when targeting aarch64."); 10869 CGM.getDiags().Report(SLoc, DiagID); 10870 return; 10871 } 10872 10873 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10874 // Advanced SIMD output. 10875 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10876 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10877 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10878 "power of 2 when targeting Advanced SIMD."); 10879 CGM.getDiags().Report(SLoc, DiagID); 10880 return; 10881 } 10882 10883 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10884 // limits. 10885 if (ISA == 's' && UserVLEN != 0) { 10886 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10887 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10888 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10889 "lanes in the architectural constraints " 10890 "for SVE (min is 128-bit, max is " 10891 "2048-bit, by steps of 128-bit)"); 10892 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10893 return; 10894 } 10895 } 10896 10897 // Sort out parameter sequence. 10898 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10899 StringRef Prefix = "_ZGV"; 10900 // Generate simdlen from user input (if any). 10901 if (UserVLEN) { 10902 if (ISA == 's') { 10903 // SVE generates only a masked function. 10904 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10905 OutputBecomesInput, Fn); 10906 } else { 10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10908 // Advanced SIMD generates one or two functions, depending on 10909 // the `[not]inbranch` clause. 10910 switch (State) { 10911 case OMPDeclareSimdDeclAttr::BS_Undefined: 10912 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10913 OutputBecomesInput, Fn); 10914 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10915 OutputBecomesInput, Fn); 10916 break; 10917 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10918 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10919 OutputBecomesInput, Fn); 10920 break; 10921 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10922 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10923 OutputBecomesInput, Fn); 10924 break; 10925 } 10926 } 10927 } else { 10928 // If no user simdlen is provided, follow the AAVFABI rules for 10929 // generating the vector length. 10930 if (ISA == 's') { 10931 // SVE, section 3.4.1, item 1. 10932 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10933 OutputBecomesInput, Fn); 10934 } else { 10935 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10936 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10937 // two vector names depending on the use of the clause 10938 // `[not]inbranch`. 10939 switch (State) { 10940 case OMPDeclareSimdDeclAttr::BS_Undefined: 10941 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10942 OutputBecomesInput, Fn); 10943 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10944 OutputBecomesInput, Fn); 10945 break; 10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10947 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10948 OutputBecomesInput, Fn); 10949 break; 10950 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10951 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10952 OutputBecomesInput, Fn); 10953 break; 10954 } 10955 } 10956 } 10957 } 10958 emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10959 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10960 llvm::Function *Fn) { 10961 ASTContext &C = CGM.getContext(); 10962 FD = FD->getMostRecentDecl(); 10963 while (FD) { 10964 // Map params to their positions in function decl. 10965 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10966 if (isa<CXXMethodDecl>(FD)) 10967 ParamPositions.try_emplace(FD, 0); 10968 unsigned ParamPos = ParamPositions.size(); 10969 for (const ParmVarDecl *P : FD->parameters()) { 10970 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10971 ++ParamPos; 10972 } 10973 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10974 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10975 // Mark uniform parameters. 10976 for (const Expr *E : Attr->uniforms()) { 10977 E = E->IgnoreParenImpCasts(); 10978 unsigned Pos; 10979 if (isa<CXXThisExpr>(E)) { 10980 Pos = ParamPositions[FD]; 10981 } else { 10982 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10983 ->getCanonicalDecl(); 10984 auto It = ParamPositions.find(PVD); 10985 assert(It != ParamPositions.end() && "Function parameter not found"); 10986 Pos = It->second; 10987 } 10988 ParamAttrs[Pos].Kind = Uniform; 10989 } 10990 // Get alignment info. 10991 auto *NI = Attr->alignments_begin(); 10992 for (const Expr *E : Attr->aligneds()) { 10993 E = E->IgnoreParenImpCasts(); 10994 unsigned Pos; 10995 QualType ParmTy; 10996 if (isa<CXXThisExpr>(E)) { 10997 Pos = ParamPositions[FD]; 10998 ParmTy = E->getType(); 10999 } else { 11000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11001 ->getCanonicalDecl(); 11002 auto It = ParamPositions.find(PVD); 11003 assert(It != ParamPositions.end() && "Function parameter not found"); 11004 Pos = It->second; 11005 ParmTy = PVD->getType(); 11006 } 11007 ParamAttrs[Pos].Alignment = 11008 (*NI) 11009 ? (*NI)->EvaluateKnownConstInt(C) 11010 : llvm::APSInt::getUnsigned( 11011 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11012 .getQuantity()); 11013 ++NI; 11014 } 11015 // Mark linear parameters. 11016 auto *SI = Attr->steps_begin(); 11017 auto *MI = Attr->modifiers_begin(); 11018 for (const Expr *E : Attr->linears()) { 11019 E = E->IgnoreParenImpCasts(); 11020 unsigned Pos; 11021 bool IsReferenceType = false; 11022 // Rescaling factor needed to compute the linear parameter 11023 // value in the mangled name. 11024 unsigned PtrRescalingFactor = 1; 11025 if (isa<CXXThisExpr>(E)) { 11026 Pos = ParamPositions[FD]; 11027 auto *P = cast<PointerType>(E->getType()); 11028 PtrRescalingFactor = CGM.getContext() 11029 .getTypeSizeInChars(P->getPointeeType()) 11030 .getQuantity(); 11031 } else { 11032 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11033 ->getCanonicalDecl(); 11034 auto It = ParamPositions.find(PVD); 11035 assert(It != ParamPositions.end() && "Function parameter not found"); 11036 Pos = It->second; 11037 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11038 PtrRescalingFactor = CGM.getContext() 11039 .getTypeSizeInChars(P->getPointeeType()) 11040 .getQuantity(); 11041 else if (PVD->getType()->isReferenceType()) { 11042 IsReferenceType = true; 11043 PtrRescalingFactor = 11044 CGM.getContext() 11045 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 11046 .getQuantity(); 11047 } 11048 } 11049 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11050 if (*MI == OMPC_LINEAR_ref) 11051 ParamAttr.Kind = LinearRef; 11052 else if (*MI == OMPC_LINEAR_uval) 11053 ParamAttr.Kind = LinearUVal; 11054 else if (IsReferenceType) 11055 ParamAttr.Kind = LinearVal; 11056 else 11057 ParamAttr.Kind = Linear; 11058 // Assuming a stride of 1, for `linear` without modifiers. 11059 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11060 if (*SI) { 11061 Expr::EvalResult Result; 11062 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11063 if (const auto *DRE = 11064 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11065 if (const auto *StridePVD = 11066 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 11067 ParamAttr.HasVarStride = true; 11068 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 11069 assert(It != ParamPositions.end() && 11070 "Function parameter not found"); 11071 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 11072 } 11073 } 11074 } else { 11075 ParamAttr.StrideOrArg = Result.Val.getInt(); 11076 } 11077 } 11078 // If we are using a linear clause on a pointer, we need to 11079 // rescale the value of linear_step with the byte size of the 11080 // pointee type. 11081 if (!ParamAttr.HasVarStride && 11082 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 11083 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11084 ++SI; 11085 ++MI; 11086 } 11087 llvm::APSInt VLENVal; 11088 SourceLocation ExprLoc; 11089 const Expr *VLENExpr = Attr->getSimdlen(); 11090 if (VLENExpr) { 11091 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11092 ExprLoc = VLENExpr->getExprLoc(); 11093 } 11094 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11095 if (CGM.getTriple().isX86()) { 11096 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11097 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11098 unsigned VLEN = VLENVal.getExtValue(); 11099 StringRef MangledName = Fn->getName(); 11100 if (CGM.getTarget().hasFeature("sve")) 11101 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11102 MangledName, 's', 128, Fn, ExprLoc); 11103 else if (CGM.getTarget().hasFeature("neon")) 11104 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11105 MangledName, 'n', 128, Fn, ExprLoc); 11106 } 11107 } 11108 FD = FD->getPreviousDecl(); 11109 } 11110 } 11111 11112 namespace { 11113 /// Cleanup action for doacross support. 11114 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11115 public: 11116 static const int DoacrossFinArgs = 2; 11117 11118 private: 11119 llvm::FunctionCallee RTLFn; 11120 llvm::Value *Args[DoacrossFinArgs]; 11121 11122 public: DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11123 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11124 ArrayRef<llvm::Value *> CallArgs) 11125 : RTLFn(RTLFn) { 11126 assert(CallArgs.size() == DoacrossFinArgs); 11127 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11128 } Emit(CodeGenFunction & CGF,Flags)11129 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11130 if (!CGF.HaveInsertPoint()) 11131 return; 11132 CGF.EmitRuntimeCall(RTLFn, Args); 11133 } 11134 }; 11135 } // namespace 11136 emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11138 const OMPLoopDirective &D, 11139 ArrayRef<Expr *> NumIterations) { 11140 if (!CGF.HaveInsertPoint()) 11141 return; 11142 11143 ASTContext &C = CGM.getContext(); 11144 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11145 RecordDecl *RD; 11146 if (KmpDimTy.isNull()) { 11147 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11148 // kmp_int64 lo; // lower 11149 // kmp_int64 up; // upper 11150 // kmp_int64 st; // stride 11151 // }; 11152 RD = C.buildImplicitRecord("kmp_dim"); 11153 RD->startDefinition(); 11154 addFieldToRecordDecl(C, RD, Int64Ty); 11155 addFieldToRecordDecl(C, RD, Int64Ty); 11156 addFieldToRecordDecl(C, RD, Int64Ty); 11157 RD->completeDefinition(); 11158 KmpDimTy = C.getRecordType(RD); 11159 } else { 11160 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11161 } 11162 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11163 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, 11164 ArraySizeModifier::Normal, 0); 11165 11166 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11167 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11168 enum { LowerFD = 0, UpperFD, StrideFD }; 11169 // Fill dims with data. 11170 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11171 LValue DimsLVal = CGF.MakeAddrLValue( 11172 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11173 // dims.upper = num_iterations; 11174 LValue UpperLVal = CGF.EmitLValueForField( 11175 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11176 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11177 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11178 Int64Ty, NumIterations[I]->getExprLoc()); 11179 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11180 // dims.stride = 1; 11181 LValue StrideLVal = CGF.EmitLValueForField( 11182 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11183 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11184 StrideLVal); 11185 } 11186 11187 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11188 // kmp_int32 num_dims, struct kmp_dim * dims); 11189 llvm::Value *Args[] = { 11190 emitUpdateLocation(CGF, D.getBeginLoc()), 11191 getThreadID(CGF, D.getBeginLoc()), 11192 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11194 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF), 11195 CGM.VoidPtrTy)}; 11196 11197 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11198 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11199 CGF.EmitRuntimeCall(RTLFn, Args); 11200 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11201 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11202 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11203 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11204 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11205 llvm::ArrayRef(FiniArgs)); 11206 } 11207 11208 template <typename T> EmitDoacrossOrdered(CodeGenFunction & CGF,CodeGenModule & CGM,const T * C,llvm::Value * ULoc,llvm::Value * ThreadID)11209 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 11210 const T *C, llvm::Value *ULoc, 11211 llvm::Value *ThreadID) { 11212 QualType Int64Ty = 11213 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11214 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11215 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11216 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); 11217 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11218 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11219 const Expr *CounterVal = C->getLoopData(I); 11220 assert(CounterVal); 11221 llvm::Value *CntVal = CGF.EmitScalarConversion( 11222 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11223 CounterVal->getExprLoc()); 11224 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11225 /*Volatile=*/false, Int64Ty); 11226 } 11227 llvm::Value *Args[] = { 11228 ULoc, ThreadID, 11229 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)}; 11230 llvm::FunctionCallee RTLFn; 11231 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 11232 OMPDoacrossKind<T> ODK; 11233 if (ODK.isSource(C)) { 11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11235 OMPRTL___kmpc_doacross_post); 11236 } else { 11237 assert(ODK.isSink(C) && "Expect sink modifier."); 11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11239 OMPRTL___kmpc_doacross_wait); 11240 } 11241 CGF.EmitRuntimeCall(RTLFn, Args); 11242 } 11243 emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11244 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11245 const OMPDependClause *C) { 11246 return EmitDoacrossOrdered<OMPDependClause>( 11247 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11248 getThreadID(CGF, C->getBeginLoc())); 11249 } 11250 emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)11251 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11252 const OMPDoacrossClause *C) { 11253 return EmitDoacrossOrdered<OMPDoacrossClause>( 11254 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11255 getThreadID(CGF, C->getBeginLoc())); 11256 } 11257 emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11258 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11259 llvm::FunctionCallee Callee, 11260 ArrayRef<llvm::Value *> Args) const { 11261 assert(Loc.isValid() && "Outlined function call location must be valid."); 11262 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11263 11264 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11265 if (Fn->doesNotThrow()) { 11266 CGF.EmitNounwindRuntimeCall(Fn, Args); 11267 return; 11268 } 11269 } 11270 CGF.EmitRuntimeCall(Callee, Args); 11271 } 11272 emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11273 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11274 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11275 ArrayRef<llvm::Value *> Args) const { 11276 emitCall(CGF, Loc, OutlinedFn, Args); 11277 } 11278 emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11279 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11280 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11281 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11282 HasEmittedDeclareTargetRegion = true; 11283 } 11284 getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11285 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11286 const VarDecl *NativeParam, 11287 const VarDecl *TargetParam) const { 11288 return CGF.GetAddrOfLocalVar(NativeParam); 11289 } 11290 11291 /// Return allocator value from expression, or return a null allocator (default 11292 /// when no allocator specified). getAllocatorVal(CodeGenFunction & CGF,const Expr * Allocator)11293 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11294 const Expr *Allocator) { 11295 llvm::Value *AllocVal; 11296 if (Allocator) { 11297 AllocVal = CGF.EmitScalarExpr(Allocator); 11298 // According to the standard, the original allocator type is a enum 11299 // (integer). Convert to pointer type, if required. 11300 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11301 CGF.getContext().VoidPtrTy, 11302 Allocator->getExprLoc()); 11303 } else { 11304 // If no allocator specified, it defaults to the null allocator. 11305 AllocVal = llvm::Constant::getNullValue( 11306 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11307 } 11308 return AllocVal; 11309 } 11310 11311 /// Return the alignment from an allocate directive if present. getAlignmentValue(CodeGenModule & CGM,const VarDecl * VD)11312 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11313 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11314 11315 if (!AllocateAlignment) 11316 return nullptr; 11317 11318 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11319 } 11320 getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11321 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11322 const VarDecl *VD) { 11323 if (!VD) 11324 return Address::invalid(); 11325 Address UntiedAddr = Address::invalid(); 11326 Address UntiedRealAddr = Address::invalid(); 11327 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11328 if (It != FunctionToUntiedTaskStackMap.end()) { 11329 const UntiedLocalVarsAddressesMap &UntiedData = 11330 UntiedLocalVarsStack[It->second]; 11331 auto I = UntiedData.find(VD); 11332 if (I != UntiedData.end()) { 11333 UntiedAddr = I->second.first; 11334 UntiedRealAddr = I->second.second; 11335 } 11336 } 11337 const VarDecl *CVD = VD->getCanonicalDecl(); 11338 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11339 // Use the default allocation. 11340 if (!isAllocatableDecl(VD)) 11341 return UntiedAddr; 11342 llvm::Value *Size; 11343 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11344 if (CVD->getType()->isVariablyModifiedType()) { 11345 Size = CGF.getTypeSize(CVD->getType()); 11346 // Align the size: ((size + align - 1) / align) * align 11347 Size = CGF.Builder.CreateNUWAdd( 11348 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11349 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11350 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11351 } else { 11352 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11353 Size = CGM.getSize(Sz.alignTo(Align)); 11354 } 11355 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11356 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11357 const Expr *Allocator = AA->getAllocator(); 11358 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11359 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11360 SmallVector<llvm::Value *, 4> Args; 11361 Args.push_back(ThreadID); 11362 if (Alignment) 11363 Args.push_back(Alignment); 11364 Args.push_back(Size); 11365 Args.push_back(AllocVal); 11366 llvm::omp::RuntimeFunction FnID = 11367 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11368 llvm::Value *Addr = CGF.EmitRuntimeCall( 11369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11370 getName({CVD->getName(), ".void.addr"})); 11371 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11372 CGM.getModule(), OMPRTL___kmpc_free); 11373 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11375 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11376 if (UntiedAddr.isValid()) 11377 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11378 11379 // Cleanup action for allocate support. 11380 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11381 llvm::FunctionCallee RTLFn; 11382 SourceLocation::UIntTy LocEncoding; 11383 Address Addr; 11384 const Expr *AllocExpr; 11385 11386 public: 11387 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11388 SourceLocation::UIntTy LocEncoding, Address Addr, 11389 const Expr *AllocExpr) 11390 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11391 AllocExpr(AllocExpr) {} 11392 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11393 if (!CGF.HaveInsertPoint()) 11394 return; 11395 llvm::Value *Args[3]; 11396 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11397 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11398 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11399 Addr.emitRawPointer(CGF), CGF.VoidPtrTy); 11400 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11401 Args[2] = AllocVal; 11402 CGF.EmitRuntimeCall(RTLFn, Args); 11403 } 11404 }; 11405 Address VDAddr = 11406 UntiedRealAddr.isValid() 11407 ? UntiedRealAddr 11408 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11409 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11410 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11411 VDAddr, Allocator); 11412 if (UntiedRealAddr.isValid()) 11413 if (auto *Region = 11414 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11415 Region->emitUntiedSwitch(CGF); 11416 return VDAddr; 11417 } 11418 return UntiedAddr; 11419 } 11420 isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const11421 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11422 const VarDecl *VD) const { 11423 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11424 if (It == FunctionToUntiedTaskStackMap.end()) 11425 return false; 11426 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11427 } 11428 NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11429 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11430 CodeGenModule &CGM, const OMPLoopDirective &S) 11431 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11433 if (!NeedToPush) 11434 return; 11435 NontemporalDeclsSet &DS = 11436 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11437 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11438 for (const Stmt *Ref : C->private_refs()) { 11439 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11440 const ValueDecl *VD; 11441 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11442 VD = DRE->getDecl(); 11443 } else { 11444 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11445 assert((ME->isImplicitCXXThis() || 11446 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11447 "Expected member of current class."); 11448 VD = ME->getMemberDecl(); 11449 } 11450 DS.insert(VD); 11451 } 11452 } 11453 } 11454 ~NontemporalDeclsRAII()11455 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11456 if (!NeedToPush) 11457 return; 11458 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11459 } 11460 UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)11461 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11462 CodeGenFunction &CGF, 11463 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11464 std::pair<Address, Address>> &LocalVars) 11465 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11466 if (!NeedToPush) 11467 return; 11468 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11469 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11470 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11471 } 11472 ~UntiedTaskLocalDeclsRAII()11473 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11474 if (!NeedToPush) 11475 return; 11476 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11477 } 11478 isNontemporalDecl(const ValueDecl * VD) const11479 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11481 11482 return llvm::any_of( 11483 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11484 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11485 } 11486 tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11487 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11488 const OMPExecutableDirective &S, 11489 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11490 const { 11491 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11492 // Vars in target/task regions must be excluded completely. 11493 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11494 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11495 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11496 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11497 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11498 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11499 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11500 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11501 } 11502 } 11503 // Exclude vars in private clauses. 11504 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11505 for (const Expr *Ref : C->varlists()) { 11506 if (!Ref->getType()->isScalarType()) 11507 continue; 11508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11509 if (!DRE) 11510 continue; 11511 NeedToCheckForLPCs.insert(DRE->getDecl()); 11512 } 11513 } 11514 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11515 for (const Expr *Ref : C->varlists()) { 11516 if (!Ref->getType()->isScalarType()) 11517 continue; 11518 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11519 if (!DRE) 11520 continue; 11521 NeedToCheckForLPCs.insert(DRE->getDecl()); 11522 } 11523 } 11524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11525 for (const Expr *Ref : C->varlists()) { 11526 if (!Ref->getType()->isScalarType()) 11527 continue; 11528 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11529 if (!DRE) 11530 continue; 11531 NeedToCheckForLPCs.insert(DRE->getDecl()); 11532 } 11533 } 11534 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11535 for (const Expr *Ref : C->varlists()) { 11536 if (!Ref->getType()->isScalarType()) 11537 continue; 11538 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11539 if (!DRE) 11540 continue; 11541 NeedToCheckForLPCs.insert(DRE->getDecl()); 11542 } 11543 } 11544 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11545 for (const Expr *Ref : C->varlists()) { 11546 if (!Ref->getType()->isScalarType()) 11547 continue; 11548 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11549 if (!DRE) 11550 continue; 11551 NeedToCheckForLPCs.insert(DRE->getDecl()); 11552 } 11553 } 11554 for (const Decl *VD : NeedToCheckForLPCs) { 11555 for (const LastprivateConditionalData &Data : 11556 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11557 if (Data.DeclToUniqueName.count(VD) > 0) { 11558 if (!Data.Disabled) 11559 NeedToAddForLPCsAsDisabled.insert(VD); 11560 break; 11561 } 11562 } 11563 } 11564 } 11565 LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11566 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11567 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11568 : CGM(CGF.CGM), 11569 Action((CGM.getLangOpts().OpenMP >= 50 && 11570 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11571 [](const OMPLastprivateClause *C) { 11572 return C->getKind() == 11573 OMPC_LASTPRIVATE_conditional; 11574 })) 11575 ? ActionToDo::PushAsLastprivateConditional 11576 : ActionToDo::DoNotPush) { 11577 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11578 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11579 return; 11580 assert(Action == ActionToDo::PushAsLastprivateConditional && 11581 "Expected a push action."); 11582 LastprivateConditionalData &Data = 11583 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11584 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11585 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11586 continue; 11587 11588 for (const Expr *Ref : C->varlists()) { 11589 Data.DeclToUniqueName.insert(std::make_pair( 11590 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11591 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11592 } 11593 } 11594 Data.IVLVal = IVLVal; 11595 Data.Fn = CGF.CurFn; 11596 } 11597 LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11598 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11599 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11600 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11601 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11602 if (CGM.getLangOpts().OpenMP < 50) 11603 return; 11604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11605 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11606 if (!NeedToAddForLPCsAsDisabled.empty()) { 11607 Action = ActionToDo::DisableLastprivateConditional; 11608 LastprivateConditionalData &Data = 11609 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11610 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11611 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11612 Data.Fn = CGF.CurFn; 11613 Data.Disabled = true; 11614 } 11615 } 11616 11617 CGOpenMPRuntime::LastprivateConditionalRAII disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11618 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11619 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11620 return LastprivateConditionalRAII(CGF, S); 11621 } 11622 ~LastprivateConditionalRAII()11623 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11624 if (CGM.getLangOpts().OpenMP < 50) 11625 return; 11626 if (Action == ActionToDo::DisableLastprivateConditional) { 11627 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11628 "Expected list of disabled private vars."); 11629 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11630 } 11631 if (Action == ActionToDo::PushAsLastprivateConditional) { 11632 assert( 11633 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11634 "Expected list of lastprivate conditional vars."); 11635 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11636 } 11637 } 11638 emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11639 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11640 const VarDecl *VD) { 11641 ASTContext &C = CGM.getContext(); 11642 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11643 if (I == LastprivateConditionalToTypes.end()) 11644 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11645 QualType NewType; 11646 const FieldDecl *VDField; 11647 const FieldDecl *FiredField; 11648 LValue BaseLVal; 11649 auto VI = I->getSecond().find(VD); 11650 if (VI == I->getSecond().end()) { 11651 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11652 RD->startDefinition(); 11653 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11654 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11655 RD->completeDefinition(); 11656 NewType = C.getRecordType(RD); 11657 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11658 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11659 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11660 } else { 11661 NewType = std::get<0>(VI->getSecond()); 11662 VDField = std::get<1>(VI->getSecond()); 11663 FiredField = std::get<2>(VI->getSecond()); 11664 BaseLVal = std::get<3>(VI->getSecond()); 11665 } 11666 LValue FiredLVal = 11667 CGF.EmitLValueForField(BaseLVal, FiredField); 11668 CGF.EmitStoreOfScalar( 11669 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11670 FiredLVal); 11671 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(); 11672 } 11673 11674 namespace { 11675 /// Checks if the lastprivate conditional variable is referenced in LHS. 11676 class LastprivateConditionalRefChecker final 11677 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11678 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11679 const Expr *FoundE = nullptr; 11680 const Decl *FoundD = nullptr; 11681 StringRef UniqueDeclName; 11682 LValue IVLVal; 11683 llvm::Function *FoundFn = nullptr; 11684 SourceLocation Loc; 11685 11686 public: VisitDeclRefExpr(const DeclRefExpr * E)11687 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11688 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11689 llvm::reverse(LPM)) { 11690 auto It = D.DeclToUniqueName.find(E->getDecl()); 11691 if (It == D.DeclToUniqueName.end()) 11692 continue; 11693 if (D.Disabled) 11694 return false; 11695 FoundE = E; 11696 FoundD = E->getDecl()->getCanonicalDecl(); 11697 UniqueDeclName = It->second; 11698 IVLVal = D.IVLVal; 11699 FoundFn = D.Fn; 11700 break; 11701 } 11702 return FoundE == E; 11703 } VisitMemberExpr(const MemberExpr * E)11704 bool VisitMemberExpr(const MemberExpr *E) { 11705 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11706 return false; 11707 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11708 llvm::reverse(LPM)) { 11709 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11710 if (It == D.DeclToUniqueName.end()) 11711 continue; 11712 if (D.Disabled) 11713 return false; 11714 FoundE = E; 11715 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11716 UniqueDeclName = It->second; 11717 IVLVal = D.IVLVal; 11718 FoundFn = D.Fn; 11719 break; 11720 } 11721 return FoundE == E; 11722 } VisitStmt(const Stmt * S)11723 bool VisitStmt(const Stmt *S) { 11724 for (const Stmt *Child : S->children()) { 11725 if (!Child) 11726 continue; 11727 if (const auto *E = dyn_cast<Expr>(Child)) 11728 if (!E->isGLValue()) 11729 continue; 11730 if (Visit(Child)) 11731 return true; 11732 } 11733 return false; 11734 } LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11735 explicit LastprivateConditionalRefChecker( 11736 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11737 : LPM(LPM) {} 11738 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> getFoundData() const11739 getFoundData() const { 11740 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11741 } 11742 }; 11743 } // namespace 11744 emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11745 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11746 LValue IVLVal, 11747 StringRef UniqueDeclName, 11748 LValue LVal, 11749 SourceLocation Loc) { 11750 // Last updated loop counter for the lastprivate conditional var. 11751 // int<xx> last_iv = 0; 11752 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11753 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11754 LLIVTy, getName({UniqueDeclName, "iv"})); 11755 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11756 IVLVal.getAlignment().getAsAlign()); 11757 LValue LastIVLVal = 11758 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType()); 11759 11760 // Last value of the lastprivate conditional. 11761 // decltype(priv_a) last_a; 11762 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11763 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11764 cast<llvm::GlobalVariable>(Last)->setAlignment( 11765 LVal.getAlignment().getAsAlign()); 11766 LValue LastLVal = 11767 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11768 11769 // Global loop counter. Required to handle inner parallel-for regions. 11770 // iv 11771 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11772 11773 // #pragma omp critical(a) 11774 // if (last_iv <= iv) { 11775 // last_iv = iv; 11776 // last_a = priv_a; 11777 // } 11778 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11779 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11780 Action.Enter(CGF); 11781 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11782 // (last_iv <= iv) ? Check if the variable is updated and store new 11783 // value in global var. 11784 llvm::Value *CmpRes; 11785 if (IVLVal.getType()->isSignedIntegerType()) { 11786 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11787 } else { 11788 assert(IVLVal.getType()->isUnsignedIntegerType() && 11789 "Loop iteration variable must be integer."); 11790 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11791 } 11792 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11793 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11794 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11795 // { 11796 CGF.EmitBlock(ThenBB); 11797 11798 // last_iv = iv; 11799 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11800 11801 // last_a = priv_a; 11802 switch (CGF.getEvaluationKind(LVal.getType())) { 11803 case TEK_Scalar: { 11804 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11805 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11806 break; 11807 } 11808 case TEK_Complex: { 11809 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11810 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11811 break; 11812 } 11813 case TEK_Aggregate: 11814 llvm_unreachable( 11815 "Aggregates are not supported in lastprivate conditional."); 11816 } 11817 // } 11818 CGF.EmitBranch(ExitBB); 11819 // There is no need to emit line number for unconditional branch. 11820 (void)ApplyDebugLocation::CreateEmpty(CGF); 11821 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11822 }; 11823 11824 if (CGM.getLangOpts().OpenMPSimd) { 11825 // Do not emit as a critical region as no parallel region could be emitted. 11826 RegionCodeGenTy ThenRCG(CodeGen); 11827 ThenRCG(CGF); 11828 } else { 11829 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11830 } 11831 } 11832 checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11833 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11834 const Expr *LHS) { 11835 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11836 return; 11837 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11838 if (!Checker.Visit(LHS)) 11839 return; 11840 const Expr *FoundE; 11841 const Decl *FoundD; 11842 StringRef UniqueDeclName; 11843 LValue IVLVal; 11844 llvm::Function *FoundFn; 11845 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11846 Checker.getFoundData(); 11847 if (FoundFn != CGF.CurFn) { 11848 // Special codegen for inner parallel regions. 11849 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11850 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11851 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11852 "Lastprivate conditional is not found in outer region."); 11853 QualType StructTy = std::get<0>(It->getSecond()); 11854 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11855 LValue PrivLVal = CGF.EmitLValue(FoundE); 11856 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11857 PrivLVal.getAddress(), 11858 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 11859 CGF.ConvertTypeForMem(StructTy)); 11860 LValue BaseLVal = 11861 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11862 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11863 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11864 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11865 FiredLVal, llvm::AtomicOrdering::Unordered, 11866 /*IsVolatile=*/true, /*isInit=*/false); 11867 return; 11868 } 11869 11870 // Private address of the lastprivate conditional in the current context. 11871 // priv_a 11872 LValue LVal = CGF.EmitLValue(FoundE); 11873 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11874 FoundE->getExprLoc()); 11875 } 11876 checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11877 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11878 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11879 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11880 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11881 return; 11882 auto Range = llvm::reverse(LastprivateConditionalStack); 11883 auto It = llvm::find_if( 11884 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11885 if (It == Range.end() || It->Fn != CGF.CurFn) 11886 return; 11887 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11888 assert(LPCI != LastprivateConditionalToTypes.end() && 11889 "Lastprivates must be registered already."); 11890 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11891 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11892 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11893 for (const auto &Pair : It->DeclToUniqueName) { 11894 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11895 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 11896 continue; 11897 auto I = LPCI->getSecond().find(Pair.first); 11898 assert(I != LPCI->getSecond().end() && 11899 "Lastprivate must be rehistered already."); 11900 // bool Cmp = priv_a.Fired != 0; 11901 LValue BaseLVal = std::get<3>(I->getSecond()); 11902 LValue FiredLVal = 11903 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11904 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11905 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11906 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11907 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11908 // if (Cmp) { 11909 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11910 CGF.EmitBlock(ThenBB); 11911 Address Addr = CGF.GetAddrOfLocalVar(VD); 11912 LValue LVal; 11913 if (VD->getType()->isReferenceType()) 11914 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11915 AlignmentSource::Decl); 11916 else 11917 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11918 AlignmentSource::Decl); 11919 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11920 D.getBeginLoc()); 11921 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11922 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11923 // } 11924 } 11925 } 11926 emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11927 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11928 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11929 SourceLocation Loc) { 11930 if (CGF.getLangOpts().OpenMP < 50) 11931 return; 11932 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11933 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11934 "Unknown lastprivate conditional variable."); 11935 StringRef UniqueName = It->second; 11936 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11937 // The variable was not updated in the region - exit. 11938 if (!GV) 11939 return; 11940 LValue LPLVal = CGF.MakeRawAddrLValue( 11941 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11942 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11943 CGF.EmitStoreOfScalar(Res, PrivLVal); 11944 } 11945 emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11946 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11947 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11948 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11949 const RegionCodeGenTy &CodeGen) { 11950 llvm_unreachable("Not supported in SIMD-only mode"); 11951 } 11952 emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11953 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11954 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11955 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11956 const RegionCodeGenTy &CodeGen) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11960 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11961 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11962 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11963 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11964 bool Tied, unsigned &NumberOfParts) { 11965 llvm_unreachable("Not supported in SIMD-only mode"); 11966 } 11967 emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)11968 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11969 SourceLocation Loc, 11970 llvm::Function *OutlinedFn, 11971 ArrayRef<llvm::Value *> CapturedVars, 11972 const Expr *IfCond, 11973 llvm::Value *NumThreads) { 11974 llvm_unreachable("Not supported in SIMD-only mode"); 11975 } 11976 emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11977 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11978 CodeGenFunction &CGF, StringRef CriticalName, 11979 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11980 const Expr *Hint) { 11981 llvm_unreachable("Not supported in SIMD-only mode"); 11982 } 11983 emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11984 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11985 const RegionCodeGenTy &MasterOpGen, 11986 SourceLocation Loc) { 11987 llvm_unreachable("Not supported in SIMD-only mode"); 11988 } 11989 emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)11990 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 11991 const RegionCodeGenTy &MasterOpGen, 11992 SourceLocation Loc, 11993 const Expr *Filter) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11997 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11998 SourceLocation Loc) { 11999 llvm_unreachable("Not supported in SIMD-only mode"); 12000 } 12001 emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12002 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12003 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12004 SourceLocation Loc) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12008 void CGOpenMPSIMDRuntime::emitSingleRegion( 12009 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12010 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12011 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12012 ArrayRef<const Expr *> AssignmentOps) { 12013 llvm_unreachable("Not supported in SIMD-only mode"); 12014 } 12015 emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12016 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12017 const RegionCodeGenTy &OrderedOpGen, 12018 SourceLocation Loc, 12019 bool IsThreads) { 12020 llvm_unreachable("Not supported in SIMD-only mode"); 12021 } 12022 emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12023 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12024 SourceLocation Loc, 12025 OpenMPDirectiveKind Kind, 12026 bool EmitChecks, 12027 bool ForceSimpleCall) { 12028 llvm_unreachable("Not supported in SIMD-only mode"); 12029 } 12030 emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12031 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12032 CodeGenFunction &CGF, SourceLocation Loc, 12033 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12034 bool Ordered, const DispatchRTInput &DispatchValues) { 12035 llvm_unreachable("Not supported in SIMD-only mode"); 12036 } 12037 emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)12038 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 12039 SourceLocation Loc) { 12040 llvm_unreachable("Not supported in SIMD-only mode"); 12041 } 12042 emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12043 void CGOpenMPSIMDRuntime::emitForStaticInit( 12044 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12045 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12046 llvm_unreachable("Not supported in SIMD-only mode"); 12047 } 12048 emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12049 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12050 CodeGenFunction &CGF, SourceLocation Loc, 12051 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12052 llvm_unreachable("Not supported in SIMD-only mode"); 12053 } 12054 emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12055 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12056 SourceLocation Loc, 12057 unsigned IVSize, 12058 bool IVSigned) { 12059 llvm_unreachable("Not supported in SIMD-only mode"); 12060 } 12061 emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12062 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12063 SourceLocation Loc, 12064 OpenMPDirectiveKind DKind) { 12065 llvm_unreachable("Not supported in SIMD-only mode"); 12066 } 12067 emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12068 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12069 SourceLocation Loc, 12070 unsigned IVSize, bool IVSigned, 12071 Address IL, Address LB, 12072 Address UB, Address ST) { 12073 llvm_unreachable("Not supported in SIMD-only mode"); 12074 } 12075 emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12076 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12077 llvm::Value *NumThreads, 12078 SourceLocation Loc) { 12079 llvm_unreachable("Not supported in SIMD-only mode"); 12080 } 12081 emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12082 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12083 ProcBindKind ProcBind, 12084 SourceLocation Loc) { 12085 llvm_unreachable("Not supported in SIMD-only mode"); 12086 } 12087 getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12088 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12089 const VarDecl *VD, 12090 Address VDAddr, 12091 SourceLocation Loc) { 12092 llvm_unreachable("Not supported in SIMD-only mode"); 12093 } 12094 emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12095 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12096 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12097 CodeGenFunction *CGF) { 12098 llvm_unreachable("Not supported in SIMD-only mode"); 12099 } 12100 getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12101 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12102 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12103 llvm_unreachable("Not supported in SIMD-only mode"); 12104 } 12105 emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12106 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12107 ArrayRef<const Expr *> Vars, 12108 SourceLocation Loc, 12109 llvm::AtomicOrdering AO) { 12110 llvm_unreachable("Not supported in SIMD-only mode"); 12111 } 12112 emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12113 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12114 const OMPExecutableDirective &D, 12115 llvm::Function *TaskFunction, 12116 QualType SharedsTy, Address Shareds, 12117 const Expr *IfCond, 12118 const OMPTaskDataTy &Data) { 12119 llvm_unreachable("Not supported in SIMD-only mode"); 12120 } 12121 emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12122 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12123 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12124 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12125 const Expr *IfCond, const OMPTaskDataTy &Data) { 12126 llvm_unreachable("Not supported in SIMD-only mode"); 12127 } 12128 emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12129 void CGOpenMPSIMDRuntime::emitReduction( 12130 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12131 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12132 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12133 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12134 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12135 ReductionOps, Options); 12136 } 12137 emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12138 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12139 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12140 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12141 llvm_unreachable("Not supported in SIMD-only mode"); 12142 } 12143 emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12144 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12145 SourceLocation Loc, 12146 bool IsWorksharingReduction) { 12147 llvm_unreachable("Not supported in SIMD-only mode"); 12148 } 12149 emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12150 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12151 SourceLocation Loc, 12152 ReductionCodeGen &RCG, 12153 unsigned N) { 12154 llvm_unreachable("Not supported in SIMD-only mode"); 12155 } 12156 getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12157 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12158 SourceLocation Loc, 12159 llvm::Value *ReductionsPtr, 12160 LValue SharedLVal) { 12161 llvm_unreachable("Not supported in SIMD-only mode"); 12162 } 12163 emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)12164 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12165 SourceLocation Loc, 12166 const OMPTaskDataTy &Data) { 12167 llvm_unreachable("Not supported in SIMD-only mode"); 12168 } 12169 emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12170 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12171 CodeGenFunction &CGF, SourceLocation Loc, 12172 OpenMPDirectiveKind CancelRegion) { 12173 llvm_unreachable("Not supported in SIMD-only mode"); 12174 } 12175 emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12176 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12177 SourceLocation Loc, const Expr *IfCond, 12178 OpenMPDirectiveKind CancelRegion) { 12179 llvm_unreachable("Not supported in SIMD-only mode"); 12180 } 12181 emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12182 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12183 const OMPExecutableDirective &D, StringRef ParentName, 12184 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12185 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12186 llvm_unreachable("Not supported in SIMD-only mode"); 12187 } 12188 emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12189 void CGOpenMPSIMDRuntime::emitTargetCall( 12190 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12191 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12192 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12193 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12194 const OMPLoopDirective &D)> 12195 SizeEmitter) { 12196 llvm_unreachable("Not supported in SIMD-only mode"); 12197 } 12198 emitTargetFunctions(GlobalDecl GD)12199 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12200 llvm_unreachable("Not supported in SIMD-only mode"); 12201 } 12202 emitTargetGlobalVariable(GlobalDecl GD)12203 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12204 llvm_unreachable("Not supported in SIMD-only mode"); 12205 } 12206 emitTargetGlobal(GlobalDecl GD)12207 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12208 return false; 12209 } 12210 emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)12211 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12212 const OMPExecutableDirective &D, 12213 SourceLocation Loc, 12214 llvm::Function *OutlinedFn, 12215 ArrayRef<llvm::Value *> CapturedVars) { 12216 llvm_unreachable("Not supported in SIMD-only mode"); 12217 } 12218 emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)12219 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12220 const Expr *NumTeams, 12221 const Expr *ThreadLimit, 12222 SourceLocation Loc) { 12223 llvm_unreachable("Not supported in SIMD-only mode"); 12224 } 12225 emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)12226 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12227 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12228 const Expr *Device, const RegionCodeGenTy &CodeGen, 12229 CGOpenMPRuntime::TargetDataInfo &Info) { 12230 llvm_unreachable("Not supported in SIMD-only mode"); 12231 } 12232 emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)12233 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12234 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12235 const Expr *Device) { 12236 llvm_unreachable("Not supported in SIMD-only mode"); 12237 } 12238 emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12239 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12240 const OMPLoopDirective &D, 12241 ArrayRef<Expr *> NumIterations) { 12242 llvm_unreachable("Not supported in SIMD-only mode"); 12243 } 12244 emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12245 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12246 const OMPDependClause *C) { 12247 llvm_unreachable("Not supported in SIMD-only mode"); 12248 } 12249 emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)12250 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12251 const OMPDoacrossClause *C) { 12252 llvm_unreachable("Not supported in SIMD-only mode"); 12253 } 12254 12255 const VarDecl * translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12256 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12257 const VarDecl *NativeParam) const { 12258 llvm_unreachable("Not supported in SIMD-only mode"); 12259 } 12260 12261 Address getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12262 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12263 const VarDecl *NativeParam, 12264 const VarDecl *TargetParam) const { 12265 llvm_unreachable("Not supported in SIMD-only mode"); 12266 } 12267