1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/CodeGen/ConstantInitBuilder.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/SetOperations.h" 27 #include "llvm/Bitcode/BitcodeReader.h" 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/GlobalValue.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Support/Format.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <cassert> 35 36 using namespace clang; 37 using namespace CodeGen; 38 using namespace llvm::omp; 39 40 namespace { 41 /// Base class for handling code generation inside OpenMP regions. 42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 43 public: 44 /// Kinds of OpenMP regions used in codegen. 45 enum CGOpenMPRegionKind { 46 /// Region with outlined function for standalone 'parallel' 47 /// directive. 48 ParallelOutlinedRegion, 49 /// Region with outlined function for standalone 'task' directive. 50 TaskOutlinedRegion, 51 /// Region for constructs that do not require function outlining, 52 /// like 'for', 'sections', 'atomic' etc. directives. 53 InlinedRegion, 54 /// Region with outlined function for standalone 'target' directive. 55 TargetRegion, 56 }; 57 58 CGOpenMPRegionInfo(const CapturedStmt &CS, 59 const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 63 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 64 65 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 69 Kind(Kind), HasCancel(HasCancel) {} 70 71 /// Get a variable or parameter for storing global thread id 72 /// inside OpenMP construct. 73 virtual const VarDecl *getThreadIDVariable() const = 0; 74 75 /// Emit the captured statement body. 76 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 77 78 /// Get an LValue for the current ThreadID variable. 79 /// \return LValue for thread id variable. This LValue always has type int32*. 80 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 81 82 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 83 84 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 85 86 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 87 88 bool hasCancel() const { return HasCancel; } 89 90 static bool classof(const CGCapturedStmtInfo *Info) { 91 return Info->getKind() == CR_OpenMP; 92 } 93 94 ~CGOpenMPRegionInfo() override = default; 95 96 protected: 97 CGOpenMPRegionKind RegionKind; 98 RegionCodeGenTy CodeGen; 99 OpenMPDirectiveKind Kind; 100 bool HasCancel; 101 }; 102 103 /// API for captured statement code generation in OpenMP constructs. 104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 105 public: 106 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 107 const RegionCodeGenTy &CodeGen, 108 OpenMPDirectiveKind Kind, bool HasCancel, 109 StringRef HelperName) 110 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 111 HasCancel), 112 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 113 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 114 } 115 116 /// Get a variable or parameter for storing global thread id 117 /// inside OpenMP construct. 118 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 119 120 /// Get the name of the capture helper. 121 StringRef getHelperName() const override { return HelperName; } 122 123 static bool classof(const CGCapturedStmtInfo *Info) { 124 return CGOpenMPRegionInfo::classof(Info) && 125 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 126 ParallelOutlinedRegion; 127 } 128 129 private: 130 /// A variable or parameter storing global thread id for OpenMP 131 /// constructs. 132 const VarDecl *ThreadIDVar; 133 StringRef HelperName; 134 }; 135 136 /// API for captured statement code generation in OpenMP constructs. 137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 138 public: 139 class UntiedTaskActionTy final : public PrePostActionTy { 140 bool Untied; 141 const VarDecl *PartIDVar; 142 const RegionCodeGenTy UntiedCodeGen; 143 llvm::SwitchInst *UntiedSwitch = nullptr; 144 145 public: 146 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 147 const RegionCodeGenTy &UntiedCodeGen) 148 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 149 void Enter(CodeGenFunction &CGF) override { 150 if (Untied) { 151 // Emit task switching point. 152 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 153 CGF.GetAddrOfLocalVar(PartIDVar), 154 PartIDVar->getType()->castAs<PointerType>()); 155 llvm::Value *Res = 156 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 157 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 158 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 159 CGF.EmitBlock(DoneBB); 160 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 161 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 162 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 163 CGF.Builder.GetInsertBlock()); 164 emitUntiedSwitch(CGF); 165 } 166 } 167 void emitUntiedSwitch(CodeGenFunction &CGF) const { 168 if (Untied) { 169 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 170 CGF.GetAddrOfLocalVar(PartIDVar), 171 PartIDVar->getType()->castAs<PointerType>()); 172 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 PartIdLVal); 174 UntiedCodeGen(CGF); 175 CodeGenFunction::JumpDest CurPoint = 176 CGF.getJumpDestInCurrentScope(".untied.next."); 177 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 178 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 179 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 CGF.Builder.GetInsertBlock()); 181 CGF.EmitBranchThroughCleanup(CurPoint); 182 CGF.EmitBlock(CurPoint.getBlock()); 183 } 184 } 185 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 186 }; 187 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 188 const VarDecl *ThreadIDVar, 189 const RegionCodeGenTy &CodeGen, 190 OpenMPDirectiveKind Kind, bool HasCancel, 191 const UntiedTaskActionTy &Action) 192 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 193 ThreadIDVar(ThreadIDVar), Action(Action) { 194 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 195 } 196 197 /// Get a variable or parameter for storing global thread id 198 /// inside OpenMP construct. 199 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 200 201 /// Get an LValue for the current ThreadID variable. 202 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 203 204 /// Get the name of the capture helper. 205 StringRef getHelperName() const override { return ".omp_outlined."; } 206 207 void emitUntiedSwitch(CodeGenFunction &CGF) override { 208 Action.emitUntiedSwitch(CGF); 209 } 210 211 static bool classof(const CGCapturedStmtInfo *Info) { 212 return CGOpenMPRegionInfo::classof(Info) && 213 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 214 TaskOutlinedRegion; 215 } 216 217 private: 218 /// A variable or parameter storing global thread id for OpenMP 219 /// constructs. 220 const VarDecl *ThreadIDVar; 221 /// Action for emitting code for untied tasks. 222 const UntiedTaskActionTy &Action; 223 }; 224 225 /// API for inlined captured statement code generation in OpenMP 226 /// constructs. 227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 228 public: 229 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 230 const RegionCodeGenTy &CodeGen, 231 OpenMPDirectiveKind Kind, bool HasCancel) 232 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 233 OldCSI(OldCSI), 234 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 235 236 // Retrieve the value of the context parameter. 237 llvm::Value *getContextValue() const override { 238 if (OuterRegionInfo) 239 return OuterRegionInfo->getContextValue(); 240 llvm_unreachable("No context value for inlined OpenMP region"); 241 } 242 243 void setContextValue(llvm::Value *V) override { 244 if (OuterRegionInfo) { 245 OuterRegionInfo->setContextValue(V); 246 return; 247 } 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 /// Lookup the captured field decl for a variable. 252 const FieldDecl *lookup(const VarDecl *VD) const override { 253 if (OuterRegionInfo) 254 return OuterRegionInfo->lookup(VD); 255 // If there is no outer outlined region,no need to lookup in a list of 256 // captured variables, we can use the original one. 257 return nullptr; 258 } 259 260 FieldDecl *getThisFieldDecl() const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->getThisFieldDecl(); 263 return nullptr; 264 } 265 266 /// Get a variable or parameter for storing global thread id 267 /// inside OpenMP construct. 268 const VarDecl *getThreadIDVariable() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThreadIDVariable(); 271 return nullptr; 272 } 273 274 /// Get an LValue for the current ThreadID variable. 275 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 278 llvm_unreachable("No LValue for inlined OpenMP construct"); 279 } 280 281 /// Get the name of the capture helper. 282 StringRef getHelperName() const override { 283 if (auto *OuterRegionInfo = getOldCSI()) 284 return OuterRegionInfo->getHelperName(); 285 llvm_unreachable("No helper name for inlined OpenMP construct"); 286 } 287 288 void emitUntiedSwitch(CodeGenFunction &CGF) override { 289 if (OuterRegionInfo) 290 OuterRegionInfo->emitUntiedSwitch(CGF); 291 } 292 293 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 294 295 static bool classof(const CGCapturedStmtInfo *Info) { 296 return CGOpenMPRegionInfo::classof(Info) && 297 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 298 } 299 300 ~CGOpenMPInlinedRegionInfo() override = default; 301 302 private: 303 /// CodeGen info about outer OpenMP region. 304 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 305 CGOpenMPRegionInfo *OuterRegionInfo; 306 }; 307 308 /// API for captured statement code generation in OpenMP target 309 /// constructs. For this captures, implicit parameters are used instead of the 310 /// captured fields. The name of the target region has to be unique in a given 311 /// application so it is provided by the client, because only the client has 312 /// the information to generate that. 313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 314 public: 315 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 316 const RegionCodeGenTy &CodeGen, StringRef HelperName) 317 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 318 /*HasCancel=*/false), 319 HelperName(HelperName) {} 320 321 /// This is unused for target regions because each starts executing 322 /// with a single thread. 323 const VarDecl *getThreadIDVariable() const override { return nullptr; } 324 325 /// Get the name of the capture helper. 326 StringRef getHelperName() const override { return HelperName; } 327 328 static bool classof(const CGCapturedStmtInfo *Info) { 329 return CGOpenMPRegionInfo::classof(Info) && 330 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 331 } 332 333 private: 334 StringRef HelperName; 335 }; 336 337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 338 llvm_unreachable("No codegen for expressions"); 339 } 340 /// API for generation of expressions captured in a innermost OpenMP 341 /// region. 342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 343 public: 344 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 345 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 346 OMPD_unknown, 347 /*HasCancel=*/false), 348 PrivScope(CGF) { 349 // Make sure the globals captured in the provided statement are local by 350 // using the privatization logic. We assume the same variable is not 351 // captured more than once. 352 for (const auto &C : CS.captures()) { 353 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 354 continue; 355 356 const VarDecl *VD = C.getCapturedVar(); 357 if (VD->isLocalVarDeclOrParm()) 358 continue; 359 360 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 361 /*RefersToEnclosingVariableOrCapture=*/false, 362 VD->getType().getNonReferenceType(), VK_LValue, 363 C.getLocation()); 364 PrivScope.addPrivate( 365 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 366 } 367 (void)PrivScope.Privatize(); 368 } 369 370 /// Lookup the captured field decl for a variable. 371 const FieldDecl *lookup(const VarDecl *VD) const override { 372 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 373 return FD; 374 return nullptr; 375 } 376 377 /// Emit the captured statement body. 378 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 379 llvm_unreachable("No body for expressions"); 380 } 381 382 /// Get a variable or parameter for storing global thread id 383 /// inside OpenMP construct. 384 const VarDecl *getThreadIDVariable() const override { 385 llvm_unreachable("No thread id for expressions"); 386 } 387 388 /// Get the name of the capture helper. 389 StringRef getHelperName() const override { 390 llvm_unreachable("No helper name for expressions"); 391 } 392 393 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 394 395 private: 396 /// Private scope to capture global variables. 397 CodeGenFunction::OMPPrivateScope PrivScope; 398 }; 399 400 /// RAII for emitting code of OpenMP constructs. 401 class InlinedOpenMPRegionRAII { 402 CodeGenFunction &CGF; 403 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 404 FieldDecl *LambdaThisCaptureField = nullptr; 405 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 406 407 public: 408 /// Constructs region for combined constructs. 409 /// \param CodeGen Code generation sequence for combined directives. Includes 410 /// a list of functions used for code generation of implicitly inlined 411 /// regions. 412 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 413 OpenMPDirectiveKind Kind, bool HasCancel) 414 : CGF(CGF) { 415 // Start emission for the construct. 416 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 417 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 418 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 419 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 420 CGF.LambdaThisCaptureField = nullptr; 421 BlockInfo = CGF.BlockInfo; 422 CGF.BlockInfo = nullptr; 423 } 424 425 ~InlinedOpenMPRegionRAII() { 426 // Restore original CapturedStmtInfo only if we're done with code emission. 427 auto *OldCSI = 428 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 429 delete CGF.CapturedStmtInfo; 430 CGF.CapturedStmtInfo = OldCSI; 431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 432 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 433 CGF.BlockInfo = BlockInfo; 434 } 435 }; 436 437 /// Values for bit flags used in the ident_t to describe the fields. 438 /// All enumeric elements are named and described in accordance with the code 439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 440 enum OpenMPLocationFlags : unsigned { 441 /// Use trampoline for internal microtask. 442 OMP_IDENT_IMD = 0x01, 443 /// Use c-style ident structure. 444 OMP_IDENT_KMPC = 0x02, 445 /// Atomic reduction option for kmpc_reduce. 446 OMP_ATOMIC_REDUCE = 0x10, 447 /// Explicit 'barrier' directive. 448 OMP_IDENT_BARRIER_EXPL = 0x20, 449 /// Implicit barrier in code. 450 OMP_IDENT_BARRIER_IMPL = 0x40, 451 /// Implicit barrier in 'for' directive. 452 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 453 /// Implicit barrier in 'sections' directive. 454 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 455 /// Implicit barrier in 'single' directive. 456 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 457 /// Call of __kmp_for_static_init for static loop. 458 OMP_IDENT_WORK_LOOP = 0x200, 459 /// Call of __kmp_for_static_init for sections. 460 OMP_IDENT_WORK_SECTIONS = 0x400, 461 /// Call of __kmp_for_static_init for distribute. 462 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 463 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 464 }; 465 466 namespace { 467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 468 /// Values for bit flags for marking which requires clauses have been used. 469 enum OpenMPOffloadingRequiresDirFlags : int64_t { 470 /// flag undefined. 471 OMP_REQ_UNDEFINED = 0x000, 472 /// no requires clause present. 473 OMP_REQ_NONE = 0x001, 474 /// reverse_offload clause. 475 OMP_REQ_REVERSE_OFFLOAD = 0x002, 476 /// unified_address clause. 477 OMP_REQ_UNIFIED_ADDRESS = 0x004, 478 /// unified_shared_memory clause. 479 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 480 /// dynamic_allocators clause. 481 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 483 }; 484 485 enum OpenMPOffloadingReservedDeviceIDs { 486 /// Device ID if the device was not defined, runtime should get it 487 /// from environment variables in the spec. 488 OMP_DEVICEID_UNDEF = -1, 489 }; 490 } // anonymous namespace 491 492 /// Describes ident structure that describes a source location. 493 /// All descriptions are taken from 494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 495 /// Original structure: 496 /// typedef struct ident { 497 /// kmp_int32 reserved_1; /**< might be used in Fortran; 498 /// see above */ 499 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 500 /// KMP_IDENT_KMPC identifies this union 501 /// member */ 502 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 503 /// see above */ 504 ///#if USE_ITT_BUILD 505 /// /* but currently used for storing 506 /// region-specific ITT */ 507 /// /* contextual information. */ 508 ///#endif /* USE_ITT_BUILD */ 509 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 510 /// C++ */ 511 /// char const *psource; /**< String describing the source location. 512 /// The string is composed of semi-colon separated 513 // fields which describe the source file, 514 /// the function and a pair of line numbers that 515 /// delimit the construct. 516 /// */ 517 /// } ident_t; 518 enum IdentFieldIndex { 519 /// might be used in Fortran 520 IdentField_Reserved_1, 521 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 522 IdentField_Flags, 523 /// Not really used in Fortran any more 524 IdentField_Reserved_2, 525 /// Source[4] in Fortran, do not use for C++ 526 IdentField_Reserved_3, 527 /// String describing the source location. The string is composed of 528 /// semi-colon separated fields which describe the source file, the function 529 /// and a pair of line numbers that delimit the construct. 530 IdentField_PSource 531 }; 532 533 /// Schedule types for 'omp for' loops (these enumerators are taken from 534 /// the enum sched_type in kmp.h). 535 enum OpenMPSchedType { 536 /// Lower bound for default (unordered) versions. 537 OMP_sch_lower = 32, 538 OMP_sch_static_chunked = 33, 539 OMP_sch_static = 34, 540 OMP_sch_dynamic_chunked = 35, 541 OMP_sch_guided_chunked = 36, 542 OMP_sch_runtime = 37, 543 OMP_sch_auto = 38, 544 /// static with chunk adjustment (e.g., simd) 545 OMP_sch_static_balanced_chunked = 45, 546 /// Lower bound for 'ordered' versions. 547 OMP_ord_lower = 64, 548 OMP_ord_static_chunked = 65, 549 OMP_ord_static = 66, 550 OMP_ord_dynamic_chunked = 67, 551 OMP_ord_guided_chunked = 68, 552 OMP_ord_runtime = 69, 553 OMP_ord_auto = 70, 554 OMP_sch_default = OMP_sch_static, 555 /// dist_schedule types 556 OMP_dist_sch_static_chunked = 91, 557 OMP_dist_sch_static = 92, 558 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 559 /// Set if the monotonic schedule modifier was present. 560 OMP_sch_modifier_monotonic = (1 << 29), 561 /// Set if the nonmonotonic schedule modifier was present. 562 OMP_sch_modifier_nonmonotonic = (1 << 30), 563 }; 564 565 enum OpenMPRTLFunction { 566 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 567 /// kmpc_micro microtask, ...); 568 OMPRTL__kmpc_fork_call, 569 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 570 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 571 OMPRTL__kmpc_threadprivate_cached, 572 /// Call to void __kmpc_threadprivate_register( ident_t *, 573 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 574 OMPRTL__kmpc_threadprivate_register, 575 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 576 OMPRTL__kmpc_global_thread_num, 577 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_critical, 580 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 581 // global_tid, kmp_critical_name *crit, uintptr_t hint); 582 OMPRTL__kmpc_critical_with_hint, 583 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *crit); 585 OMPRTL__kmpc_end_critical, 586 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 587 // global_tid); 588 OMPRTL__kmpc_cancel_barrier, 589 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 590 OMPRTL__kmpc_barrier, 591 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 592 OMPRTL__kmpc_for_static_fini, 593 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_serialized_parallel, 596 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 597 // global_tid); 598 OMPRTL__kmpc_end_serialized_parallel, 599 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 600 // kmp_int32 num_threads); 601 OMPRTL__kmpc_push_num_threads, 602 // Call to void __kmpc_flush(ident_t *loc); 603 OMPRTL__kmpc_flush, 604 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 605 OMPRTL__kmpc_master, 606 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_master, 608 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 609 // int end_part); 610 OMPRTL__kmpc_omp_taskyield, 611 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_single, 613 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_single, 615 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 616 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 617 // kmp_routine_entry_t *task_entry); 618 OMPRTL__kmpc_omp_task_alloc, 619 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 620 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 621 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 622 // kmp_int64 device_id); 623 OMPRTL__kmpc_omp_target_task_alloc, 624 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 625 // new_task); 626 OMPRTL__kmpc_omp_task, 627 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 628 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 629 // kmp_int32 didit); 630 OMPRTL__kmpc_copyprivate, 631 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 632 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 633 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 634 OMPRTL__kmpc_reduce, 635 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 636 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 637 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 638 // *lck); 639 OMPRTL__kmpc_reduce_nowait, 640 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 641 // kmp_critical_name *lck); 642 OMPRTL__kmpc_end_reduce, 643 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 644 // kmp_critical_name *lck); 645 OMPRTL__kmpc_end_reduce_nowait, 646 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 647 // kmp_task_t * new_task); 648 OMPRTL__kmpc_omp_task_begin_if0, 649 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 650 // kmp_task_t * new_task); 651 OMPRTL__kmpc_omp_task_complete_if0, 652 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 653 OMPRTL__kmpc_ordered, 654 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 655 OMPRTL__kmpc_end_ordered, 656 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 657 // global_tid); 658 OMPRTL__kmpc_omp_taskwait, 659 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_taskgroup, 661 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_taskgroup, 663 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 664 // int proc_bind); 665 OMPRTL__kmpc_push_proc_bind, 666 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 667 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 668 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 669 OMPRTL__kmpc_omp_task_with_deps, 670 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 671 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 672 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 673 OMPRTL__kmpc_omp_wait_deps, 674 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 675 // global_tid, kmp_int32 cncl_kind); 676 OMPRTL__kmpc_cancellationpoint, 677 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 678 // kmp_int32 cncl_kind); 679 OMPRTL__kmpc_cancel, 680 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 681 // kmp_int32 num_teams, kmp_int32 thread_limit); 682 OMPRTL__kmpc_push_num_teams, 683 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 684 // microtask, ...); 685 OMPRTL__kmpc_fork_teams, 686 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 687 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 688 // sched, kmp_uint64 grainsize, void *task_dup); 689 OMPRTL__kmpc_taskloop, 690 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 691 // num_dims, struct kmp_dim *dims); 692 OMPRTL__kmpc_doacross_init, 693 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 694 OMPRTL__kmpc_doacross_fini, 695 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 696 // *vec); 697 OMPRTL__kmpc_doacross_post, 698 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 699 // *vec); 700 OMPRTL__kmpc_doacross_wait, 701 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 702 // *data); 703 OMPRTL__kmpc_task_reduction_init, 704 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 705 // *d); 706 OMPRTL__kmpc_task_reduction_get_th_data, 707 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 708 OMPRTL__kmpc_alloc, 709 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 710 OMPRTL__kmpc_free, 711 712 // 713 // Offloading related calls 714 // 715 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 716 // size); 717 OMPRTL__kmpc_push_target_tripcount, 718 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 719 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 720 // *arg_types); 721 OMPRTL__tgt_target, 722 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 723 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 724 // *arg_types); 725 OMPRTL__tgt_target_nowait, 726 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 727 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 728 // *arg_types, int32_t num_teams, int32_t thread_limit); 729 OMPRTL__tgt_target_teams, 730 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 731 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 732 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 733 OMPRTL__tgt_target_teams_nowait, 734 // Call to void __tgt_register_requires(int64_t flags); 735 OMPRTL__tgt_register_requires, 736 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 737 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 738 OMPRTL__tgt_target_data_begin, 739 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 740 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 741 // *arg_types); 742 OMPRTL__tgt_target_data_begin_nowait, 743 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 744 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 745 OMPRTL__tgt_target_data_end, 746 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 747 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 748 // *arg_types); 749 OMPRTL__tgt_target_data_end_nowait, 750 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 751 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 752 OMPRTL__tgt_target_data_update, 753 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 754 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 755 // *arg_types); 756 OMPRTL__tgt_target_data_update_nowait, 757 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 758 OMPRTL__tgt_mapper_num_components, 759 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 760 // *base, void *begin, int64_t size, int64_t type); 761 OMPRTL__tgt_push_mapper_component, 762 }; 763 764 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 765 /// region. 766 class CleanupTy final : public EHScopeStack::Cleanup { 767 PrePostActionTy *Action; 768 769 public: 770 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 771 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 772 if (!CGF.HaveInsertPoint()) 773 return; 774 Action->Exit(CGF); 775 } 776 }; 777 778 } // anonymous namespace 779 780 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 781 CodeGenFunction::RunCleanupsScope Scope(CGF); 782 if (PrePostAction) { 783 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 784 Callback(CodeGen, CGF, *PrePostAction); 785 } else { 786 PrePostActionTy Action; 787 Callback(CodeGen, CGF, Action); 788 } 789 } 790 791 /// Check if the combiner is a call to UDR combiner and if it is so return the 792 /// UDR decl used for reduction. 793 static const OMPDeclareReductionDecl * 794 getReductionInit(const Expr *ReductionOp) { 795 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 796 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 797 if (const auto *DRE = 798 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 799 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 800 return DRD; 801 return nullptr; 802 } 803 804 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 805 const OMPDeclareReductionDecl *DRD, 806 const Expr *InitOp, 807 Address Private, Address Original, 808 QualType Ty) { 809 if (DRD->getInitializer()) { 810 std::pair<llvm::Function *, llvm::Function *> Reduction = 811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 812 const auto *CE = cast<CallExpr>(InitOp); 813 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 814 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 815 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 816 const auto *LHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 818 const auto *RHSDRE = 819 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 820 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 821 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 822 [=]() { return Private; }); 823 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 824 [=]() { return Original; }); 825 (void)PrivateScope.Privatize(); 826 RValue Func = RValue::get(Reduction.second); 827 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 828 CGF.EmitIgnoredExpr(InitOp); 829 } else { 830 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 831 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 832 auto *GV = new llvm::GlobalVariable( 833 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 834 llvm::GlobalValue::PrivateLinkage, Init, Name); 835 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 836 RValue InitRVal; 837 switch (CGF.getEvaluationKind(Ty)) { 838 case TEK_Scalar: 839 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 840 break; 841 case TEK_Complex: 842 InitRVal = 843 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 844 break; 845 case TEK_Aggregate: 846 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 847 break; 848 } 849 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 850 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 851 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 852 /*IsInitializer=*/false); 853 } 854 } 855 856 /// Emit initialization of arrays of complex types. 857 /// \param DestAddr Address of the array. 858 /// \param Type Type of array. 859 /// \param Init Initial expression of array. 860 /// \param SrcAddr Address of the original array. 861 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 862 QualType Type, bool EmitDeclareReductionInit, 863 const Expr *Init, 864 const OMPDeclareReductionDecl *DRD, 865 Address SrcAddr = Address::invalid()) { 866 // Perform element-by-element initialization. 867 QualType ElementTy; 868 869 // Drill down to the base element type on both arrays. 870 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 871 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 872 DestAddr = 873 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 874 if (DRD) 875 SrcAddr = 876 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 877 878 llvm::Value *SrcBegin = nullptr; 879 if (DRD) 880 SrcBegin = SrcAddr.getPointer(); 881 llvm::Value *DestBegin = DestAddr.getPointer(); 882 // Cast from pointer to array type to pointer to single element. 883 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 884 // The basic structure here is a while-do loop. 885 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 886 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 887 llvm::Value *IsEmpty = 888 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 889 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 890 891 // Enter the loop body, making that address the current address. 892 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 893 CGF.EmitBlock(BodyBB); 894 895 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 896 897 llvm::PHINode *SrcElementPHI = nullptr; 898 Address SrcElementCurrent = Address::invalid(); 899 if (DRD) { 900 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 901 "omp.arraycpy.srcElementPast"); 902 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 903 SrcElementCurrent = 904 Address(SrcElementPHI, 905 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 906 } 907 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 908 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 909 DestElementPHI->addIncoming(DestBegin, EntryBB); 910 Address DestElementCurrent = 911 Address(DestElementPHI, 912 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 913 914 // Emit copy. 915 { 916 CodeGenFunction::RunCleanupsScope InitScope(CGF); 917 if (EmitDeclareReductionInit) { 918 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 919 SrcElementCurrent, ElementTy); 920 } else 921 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 922 /*IsInitializer=*/false); 923 } 924 925 if (DRD) { 926 // Shift the address forward by one element. 927 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 928 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 929 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 930 } 931 932 // Shift the address forward by one element. 933 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 934 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 935 // Check whether we've reached the end. 936 llvm::Value *Done = 937 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 938 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 939 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 940 941 // Done. 942 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 943 } 944 945 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 946 return CGF.EmitOMPSharedLValue(E); 947 } 948 949 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 950 const Expr *E) { 951 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 952 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 953 return LValue(); 954 } 955 956 void ReductionCodeGen::emitAggregateInitialization( 957 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 958 const OMPDeclareReductionDecl *DRD) { 959 // Emit VarDecl with copy init for arrays. 960 // Get the address of the original variable captured in current 961 // captured region. 962 const auto *PrivateVD = 963 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 964 bool EmitDeclareReductionInit = 965 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 966 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 967 EmitDeclareReductionInit, 968 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 969 : PrivateVD->getInit(), 970 DRD, SharedLVal.getAddress(CGF)); 971 } 972 973 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 974 ArrayRef<const Expr *> Privates, 975 ArrayRef<const Expr *> ReductionOps) { 976 ClausesData.reserve(Shareds.size()); 977 SharedAddresses.reserve(Shareds.size()); 978 Sizes.reserve(Shareds.size()); 979 BaseDecls.reserve(Shareds.size()); 980 auto IPriv = Privates.begin(); 981 auto IRed = ReductionOps.begin(); 982 for (const Expr *Ref : Shareds) { 983 ClausesData.emplace_back(Ref, *IPriv, *IRed); 984 std::advance(IPriv, 1); 985 std::advance(IRed, 1); 986 } 987 } 988 989 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 990 assert(SharedAddresses.size() == N && 991 "Number of generated lvalues must be exactly N."); 992 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 993 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 994 SharedAddresses.emplace_back(First, Second); 995 } 996 997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 998 const auto *PrivateVD = 999 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1000 QualType PrivateType = PrivateVD->getType(); 1001 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1002 if (!PrivateType->isVariablyModifiedType()) { 1003 Sizes.emplace_back( 1004 CGF.getTypeSize( 1005 SharedAddresses[N].first.getType().getNonReferenceType()), 1006 nullptr); 1007 return; 1008 } 1009 llvm::Value *Size; 1010 llvm::Value *SizeInChars; 1011 auto *ElemType = cast<llvm::PointerType>( 1012 SharedAddresses[N].first.getPointer(CGF)->getType()) 1013 ->getElementType(); 1014 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1015 if (AsArraySection) { 1016 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1017 SharedAddresses[N].first.getPointer(CGF)); 1018 Size = CGF.Builder.CreateNUWAdd( 1019 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1020 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1021 } else { 1022 SizeInChars = CGF.getTypeSize( 1023 SharedAddresses[N].first.getType().getNonReferenceType()); 1024 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1025 } 1026 Sizes.emplace_back(SizeInChars, Size); 1027 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1028 CGF, 1029 cast<OpaqueValueExpr>( 1030 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1031 RValue::get(Size)); 1032 CGF.EmitVariablyModifiedType(PrivateType); 1033 } 1034 1035 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1036 llvm::Value *Size) { 1037 const auto *PrivateVD = 1038 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1039 QualType PrivateType = PrivateVD->getType(); 1040 if (!PrivateType->isVariablyModifiedType()) { 1041 assert(!Size && !Sizes[N].second && 1042 "Size should be nullptr for non-variably modified reduction " 1043 "items."); 1044 return; 1045 } 1046 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1047 CGF, 1048 cast<OpaqueValueExpr>( 1049 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1050 RValue::get(Size)); 1051 CGF.EmitVariablyModifiedType(PrivateType); 1052 } 1053 1054 void ReductionCodeGen::emitInitialization( 1055 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1056 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1057 assert(SharedAddresses.size() > N && "No variable was generated"); 1058 const auto *PrivateVD = 1059 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1060 const OMPDeclareReductionDecl *DRD = 1061 getReductionInit(ClausesData[N].ReductionOp); 1062 QualType PrivateType = PrivateVD->getType(); 1063 PrivateAddr = CGF.Builder.CreateElementBitCast( 1064 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1065 QualType SharedType = SharedAddresses[N].first.getType(); 1066 SharedLVal = CGF.MakeAddrLValue( 1067 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1068 CGF.ConvertTypeForMem(SharedType)), 1069 SharedType, SharedAddresses[N].first.getBaseInfo(), 1070 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1071 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1072 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1073 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1074 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1075 PrivateAddr, SharedLVal.getAddress(CGF), 1076 SharedLVal.getType()); 1077 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1078 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1079 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1080 PrivateVD->getType().getQualifiers(), 1081 /*IsInitializer=*/false); 1082 } 1083 } 1084 1085 bool ReductionCodeGen::needCleanups(unsigned N) { 1086 const auto *PrivateVD = 1087 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1088 QualType PrivateType = PrivateVD->getType(); 1089 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1090 return DTorKind != QualType::DK_none; 1091 } 1092 1093 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1094 Address PrivateAddr) { 1095 const auto *PrivateVD = 1096 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1097 QualType PrivateType = PrivateVD->getType(); 1098 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1099 if (needCleanups(N)) { 1100 PrivateAddr = CGF.Builder.CreateElementBitCast( 1101 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1102 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1103 } 1104 } 1105 1106 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1107 LValue BaseLV) { 1108 BaseTy = BaseTy.getNonReferenceType(); 1109 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1110 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1111 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1112 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1113 } else { 1114 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1115 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1116 } 1117 BaseTy = BaseTy->getPointeeType(); 1118 } 1119 return CGF.MakeAddrLValue( 1120 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1121 CGF.ConvertTypeForMem(ElTy)), 1122 BaseLV.getType(), BaseLV.getBaseInfo(), 1123 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1124 } 1125 1126 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1127 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1128 llvm::Value *Addr) { 1129 Address Tmp = Address::invalid(); 1130 Address TopTmp = Address::invalid(); 1131 Address MostTopTmp = Address::invalid(); 1132 BaseTy = BaseTy.getNonReferenceType(); 1133 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1134 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1135 Tmp = CGF.CreateMemTemp(BaseTy); 1136 if (TopTmp.isValid()) 1137 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1138 else 1139 MostTopTmp = Tmp; 1140 TopTmp = Tmp; 1141 BaseTy = BaseTy->getPointeeType(); 1142 } 1143 llvm::Type *Ty = BaseLVType; 1144 if (Tmp.isValid()) 1145 Ty = Tmp.getElementType(); 1146 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1147 if (Tmp.isValid()) { 1148 CGF.Builder.CreateStore(Addr, Tmp); 1149 return MostTopTmp; 1150 } 1151 return Address(Addr, BaseLVAlignment); 1152 } 1153 1154 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1155 const VarDecl *OrigVD = nullptr; 1156 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1157 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1159 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1160 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1161 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1162 DE = cast<DeclRefExpr>(Base); 1163 OrigVD = cast<VarDecl>(DE->getDecl()); 1164 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1165 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1166 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1167 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1168 DE = cast<DeclRefExpr>(Base); 1169 OrigVD = cast<VarDecl>(DE->getDecl()); 1170 } 1171 return OrigVD; 1172 } 1173 1174 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1175 Address PrivateAddr) { 1176 const DeclRefExpr *DE; 1177 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1178 BaseDecls.emplace_back(OrigVD); 1179 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1180 LValue BaseLValue = 1181 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1182 OriginalBaseLValue); 1183 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1184 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1185 llvm::Value *PrivatePointer = 1186 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1187 PrivateAddr.getPointer(), 1188 SharedAddresses[N].first.getAddress(CGF).getType()); 1189 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1190 return castToBase(CGF, OrigVD->getType(), 1191 SharedAddresses[N].first.getType(), 1192 OriginalBaseLValue.getAddress(CGF).getType(), 1193 OriginalBaseLValue.getAlignment(), Ptr); 1194 } 1195 BaseDecls.emplace_back( 1196 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1197 return PrivateAddr; 1198 } 1199 1200 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1201 const OMPDeclareReductionDecl *DRD = 1202 getReductionInit(ClausesData[N].ReductionOp); 1203 return DRD && DRD->getInitializer(); 1204 } 1205 1206 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1207 return CGF.EmitLoadOfPointerLValue( 1208 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1209 getThreadIDVariable()->getType()->castAs<PointerType>()); 1210 } 1211 1212 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1213 if (!CGF.HaveInsertPoint()) 1214 return; 1215 // 1.2.2 OpenMP Language Terminology 1216 // Structured block - An executable statement with a single entry at the 1217 // top and a single exit at the bottom. 1218 // The point of exit cannot be a branch out of the structured block. 1219 // longjmp() and throw() must not violate the entry/exit criteria. 1220 CGF.EHStack.pushTerminate(); 1221 CodeGen(CGF); 1222 CGF.EHStack.popTerminate(); 1223 } 1224 1225 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1226 CodeGenFunction &CGF) { 1227 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1228 getThreadIDVariable()->getType(), 1229 AlignmentSource::Decl); 1230 } 1231 1232 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1233 QualType FieldTy) { 1234 auto *Field = FieldDecl::Create( 1235 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1236 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1237 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1238 Field->setAccess(AS_public); 1239 DC->addDecl(Field); 1240 return Field; 1241 } 1242 1243 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1244 StringRef Separator) 1245 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1246 OffloadEntriesInfoManager(CGM) { 1247 ASTContext &C = CGM.getContext(); 1248 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1249 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1250 RD->startDefinition(); 1251 // reserved_1 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // flags 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_2 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // reserved_3 1258 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1259 // psource 1260 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1261 RD->completeDefinition(); 1262 IdentQTy = C.getRecordType(RD); 1263 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1264 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1265 1266 loadOffloadInfoMetadata(); 1267 } 1268 1269 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1270 const GlobalDecl &OldGD, 1271 llvm::GlobalValue *OrigAddr, 1272 bool IsForDefinition) { 1273 // Emit at least a definition for the aliasee if the the address of the 1274 // original function is requested. 1275 if (IsForDefinition || OrigAddr) 1276 (void)CGM.GetAddrOfGlobal(NewGD); 1277 StringRef NewMangledName = CGM.getMangledName(NewGD); 1278 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1279 if (Addr && !Addr->isDeclaration()) { 1280 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1281 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1282 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1283 1284 // Create a reference to the named value. This ensures that it is emitted 1285 // if a deferred decl. 1286 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1287 1288 // Create the new alias itself, but don't set a name yet. 1289 auto *GA = 1290 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1291 1292 if (OrigAddr) { 1293 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1294 1295 GA->takeName(OrigAddr); 1296 OrigAddr->replaceAllUsesWith( 1297 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1298 OrigAddr->eraseFromParent(); 1299 } else { 1300 GA->setName(CGM.getMangledName(OldGD)); 1301 } 1302 1303 // Set attributes which are particular to an alias; this is a 1304 // specialization of the attributes which may be set on a global function. 1305 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1306 D->isWeakImported()) 1307 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1308 1309 CGM.SetCommonAttributes(OldGD, GA); 1310 return true; 1311 } 1312 return false; 1313 } 1314 1315 void CGOpenMPRuntime::clear() { 1316 InternalVars.clear(); 1317 // Clean non-target variable declarations possibly used only in debug info. 1318 for (const auto &Data : EmittedNonTargetVariables) { 1319 if (!Data.getValue().pointsToAliveValue()) 1320 continue; 1321 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1322 if (!GV) 1323 continue; 1324 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1325 continue; 1326 GV->eraseFromParent(); 1327 } 1328 // Emit aliases for the deferred aliasees. 1329 for (const auto &Pair : DeferredVariantFunction) { 1330 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1331 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1332 // If not able to emit alias, just emit original declaration. 1333 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1334 /*IsForDefinition=*/false); 1335 } 1336 } 1337 1338 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1339 SmallString<128> Buffer; 1340 llvm::raw_svector_ostream OS(Buffer); 1341 StringRef Sep = FirstSeparator; 1342 for (StringRef Part : Parts) { 1343 OS << Sep << Part; 1344 Sep = Separator; 1345 } 1346 return OS.str(); 1347 } 1348 1349 static llvm::Function * 1350 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1351 const Expr *CombinerInitializer, const VarDecl *In, 1352 const VarDecl *Out, bool IsCombiner) { 1353 // void .omp_combiner.(Ty *in, Ty *out); 1354 ASTContext &C = CGM.getContext(); 1355 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1356 FunctionArgList Args; 1357 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1359 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1360 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1361 Args.push_back(&OmpOutParm); 1362 Args.push_back(&OmpInParm); 1363 const CGFunctionInfo &FnInfo = 1364 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1365 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1366 std::string Name = CGM.getOpenMPRuntime().getName( 1367 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1368 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1369 Name, &CGM.getModule()); 1370 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1371 if (CGM.getLangOpts().Optimize) { 1372 Fn->removeFnAttr(llvm::Attribute::NoInline); 1373 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1374 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1375 } 1376 CodeGenFunction CGF(CGM); 1377 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1378 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1379 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1380 Out->getLocation()); 1381 CodeGenFunction::OMPPrivateScope Scope(CGF); 1382 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1383 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1384 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1385 .getAddress(CGF); 1386 }); 1387 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1388 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1389 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1390 .getAddress(CGF); 1391 }); 1392 (void)Scope.Privatize(); 1393 if (!IsCombiner && Out->hasInit() && 1394 !CGF.isTrivialInitializer(Out->getInit())) { 1395 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1396 Out->getType().getQualifiers(), 1397 /*IsInitializer=*/true); 1398 } 1399 if (CombinerInitializer) 1400 CGF.EmitIgnoredExpr(CombinerInitializer); 1401 Scope.ForceCleanup(); 1402 CGF.FinishFunction(); 1403 return Fn; 1404 } 1405 1406 void CGOpenMPRuntime::emitUserDefinedReduction( 1407 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1408 if (UDRMap.count(D) > 0) 1409 return; 1410 llvm::Function *Combiner = emitCombinerOrInitializer( 1411 CGM, D->getType(), D->getCombiner(), 1412 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1413 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1414 /*IsCombiner=*/true); 1415 llvm::Function *Initializer = nullptr; 1416 if (const Expr *Init = D->getInitializer()) { 1417 Initializer = emitCombinerOrInitializer( 1418 CGM, D->getType(), 1419 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1420 : nullptr, 1421 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1422 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1423 /*IsCombiner=*/false); 1424 } 1425 UDRMap.try_emplace(D, Combiner, Initializer); 1426 if (CGF) { 1427 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1428 Decls.second.push_back(D); 1429 } 1430 } 1431 1432 std::pair<llvm::Function *, llvm::Function *> 1433 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1434 auto I = UDRMap.find(D); 1435 if (I != UDRMap.end()) 1436 return I->second; 1437 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1438 return UDRMap.lookup(D); 1439 } 1440 1441 namespace { 1442 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1443 // Builder if one is present. 1444 struct PushAndPopStackRAII { 1445 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1446 bool HasCancel) 1447 : OMPBuilder(OMPBuilder) { 1448 if (!OMPBuilder) 1449 return; 1450 1451 // The following callback is the crucial part of clangs cleanup process. 1452 // 1453 // NOTE: 1454 // Once the OpenMPIRBuilder is used to create parallel regions (and 1455 // similar), the cancellation destination (Dest below) is determined via 1456 // IP. That means if we have variables to finalize we split the block at IP, 1457 // use the new block (=BB) as destination to build a JumpDest (via 1458 // getJumpDestInCurrentScope(BB)) which then is fed to 1459 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1460 // to push & pop an FinalizationInfo object. 1461 // The FiniCB will still be needed but at the point where the 1462 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1463 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1464 assert(IP.getBlock()->end() == IP.getPoint() && 1465 "Clang CG should cause non-terminated block!"); 1466 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1467 CGF.Builder.restoreIP(IP); 1468 CodeGenFunction::JumpDest Dest = 1469 CGF.getOMPCancelDestination(OMPD_parallel); 1470 CGF.EmitBranchThroughCleanup(Dest); 1471 }; 1472 1473 // TODO: Remove this once we emit parallel regions through the 1474 // OpenMPIRBuilder as it can do this setup internally. 1475 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1476 {FiniCB, OMPD_parallel, HasCancel}); 1477 OMPBuilder->pushFinalizationCB(std::move(FI)); 1478 } 1479 ~PushAndPopStackRAII() { 1480 if (OMPBuilder) 1481 OMPBuilder->popFinalizationCB(); 1482 } 1483 llvm::OpenMPIRBuilder *OMPBuilder; 1484 }; 1485 } // namespace 1486 1487 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1488 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1489 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1490 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1491 assert(ThreadIDVar->getType()->isPointerType() && 1492 "thread id variable must be of type kmp_int32 *"); 1493 CodeGenFunction CGF(CGM, true); 1494 bool HasCancel = false; 1495 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1496 HasCancel = OPD->hasCancel(); 1497 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1498 HasCancel = OPSD->hasCancel(); 1499 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1500 HasCancel = OPFD->hasCancel(); 1501 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1502 HasCancel = OPFD->hasCancel(); 1503 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1504 HasCancel = OPFD->hasCancel(); 1505 else if (const auto *OPFD = 1506 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1507 HasCancel = OPFD->hasCancel(); 1508 else if (const auto *OPFD = 1509 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1510 HasCancel = OPFD->hasCancel(); 1511 1512 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1513 // parallel region to make cancellation barriers work properly. 1514 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1515 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1516 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1517 HasCancel, OutlinedHelperName); 1518 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1519 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1520 } 1521 1522 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1523 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1524 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1525 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1526 return emitParallelOrTeamsOutlinedFunction( 1527 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1528 } 1529 1530 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1531 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1532 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1533 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1534 return emitParallelOrTeamsOutlinedFunction( 1535 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1536 } 1537 1538 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1539 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1540 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1541 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1542 bool Tied, unsigned &NumberOfParts) { 1543 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1544 PrePostActionTy &) { 1545 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1546 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1547 llvm::Value *TaskArgs[] = { 1548 UpLoc, ThreadID, 1549 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1550 TaskTVar->getType()->castAs<PointerType>()) 1551 .getPointer(CGF)}; 1552 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1553 }; 1554 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1555 UntiedCodeGen); 1556 CodeGen.setAction(Action); 1557 assert(!ThreadIDVar->getType()->isPointerType() && 1558 "thread id variable must be of type kmp_int32 for tasks"); 1559 const OpenMPDirectiveKind Region = 1560 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1561 : OMPD_task; 1562 const CapturedStmt *CS = D.getCapturedStmt(Region); 1563 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1564 CodeGenFunction CGF(CGM, true); 1565 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1566 InnermostKind, 1567 TD ? TD->hasCancel() : false, Action); 1568 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1569 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1570 if (!Tied) 1571 NumberOfParts = Action.getNumberOfParts(); 1572 return Res; 1573 } 1574 1575 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1576 const RecordDecl *RD, const CGRecordLayout &RL, 1577 ArrayRef<llvm::Constant *> Data) { 1578 llvm::StructType *StructTy = RL.getLLVMType(); 1579 unsigned PrevIdx = 0; 1580 ConstantInitBuilder CIBuilder(CGM); 1581 auto DI = Data.begin(); 1582 for (const FieldDecl *FD : RD->fields()) { 1583 unsigned Idx = RL.getLLVMFieldNo(FD); 1584 // Fill the alignment. 1585 for (unsigned I = PrevIdx; I < Idx; ++I) 1586 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1587 PrevIdx = Idx + 1; 1588 Fields.add(*DI); 1589 ++DI; 1590 } 1591 } 1592 1593 template <class... As> 1594 static llvm::GlobalVariable * 1595 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1596 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1597 As &&... Args) { 1598 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1599 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1600 ConstantInitBuilder CIBuilder(CGM); 1601 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1602 buildStructValue(Fields, CGM, RD, RL, Data); 1603 return Fields.finishAndCreateGlobal( 1604 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1605 std::forward<As>(Args)...); 1606 } 1607 1608 template <typename T> 1609 static void 1610 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1611 ArrayRef<llvm::Constant *> Data, 1612 T &Parent) { 1613 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1614 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1615 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1616 buildStructValue(Fields, CGM, RD, RL, Data); 1617 Fields.finishAndAddTo(Parent); 1618 } 1619 1620 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1621 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1622 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1623 FlagsTy FlagsKey(Flags, Reserved2Flags); 1624 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1625 if (!Entry) { 1626 if (!DefaultOpenMPPSource) { 1627 // Initialize default location for psource field of ident_t structure of 1628 // all ident_t objects. Format is ";file;function;line;column;;". 1629 // Taken from 1630 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1631 DefaultOpenMPPSource = 1632 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1633 DefaultOpenMPPSource = 1634 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1635 } 1636 1637 llvm::Constant *Data[] = { 1638 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1639 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1640 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1641 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1642 llvm::GlobalValue *DefaultOpenMPLocation = 1643 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1644 llvm::GlobalValue::PrivateLinkage); 1645 DefaultOpenMPLocation->setUnnamedAddr( 1646 llvm::GlobalValue::UnnamedAddr::Global); 1647 1648 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1649 } 1650 return Address(Entry, Align); 1651 } 1652 1653 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1654 bool AtCurrentPoint) { 1655 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1656 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1657 1658 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1659 if (AtCurrentPoint) { 1660 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1661 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1662 } else { 1663 Elem.second.ServiceInsertPt = 1664 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1665 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1666 } 1667 } 1668 1669 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1671 if (Elem.second.ServiceInsertPt) { 1672 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1673 Elem.second.ServiceInsertPt = nullptr; 1674 Ptr->eraseFromParent(); 1675 } 1676 } 1677 1678 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1679 SourceLocation Loc, 1680 unsigned Flags) { 1681 Flags |= OMP_IDENT_KMPC; 1682 // If no debug info is generated - return global default location. 1683 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1684 Loc.isInvalid()) 1685 return getOrCreateDefaultLocation(Flags).getPointer(); 1686 1687 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1688 1689 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1690 Address LocValue = Address::invalid(); 1691 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1692 if (I != OpenMPLocThreadIDMap.end()) 1693 LocValue = Address(I->second.DebugLoc, Align); 1694 1695 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1696 // GetOpenMPThreadID was called before this routine. 1697 if (!LocValue.isValid()) { 1698 // Generate "ident_t .kmpc_loc.addr;" 1699 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1700 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1701 Elem.second.DebugLoc = AI.getPointer(); 1702 LocValue = AI; 1703 1704 if (!Elem.second.ServiceInsertPt) 1705 setLocThreadIdInsertPt(CGF); 1706 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1707 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1708 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1709 CGF.getTypeSize(IdentQTy)); 1710 } 1711 1712 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1713 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1714 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1715 LValue PSource = 1716 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1717 1718 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1719 if (OMPDebugLoc == nullptr) { 1720 SmallString<128> Buffer2; 1721 llvm::raw_svector_ostream OS2(Buffer2); 1722 // Build debug location 1723 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1724 OS2 << ";" << PLoc.getFilename() << ";"; 1725 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1726 OS2 << FD->getQualifiedNameAsString(); 1727 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1728 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1729 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1730 } 1731 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1732 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1733 1734 // Our callers always pass this to a runtime function, so for 1735 // convenience, go ahead and return a naked pointer. 1736 return LocValue.getPointer(); 1737 } 1738 1739 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1740 SourceLocation Loc) { 1741 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1742 1743 llvm::Value *ThreadID = nullptr; 1744 // Check whether we've already cached a load of the thread id in this 1745 // function. 1746 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1747 if (I != OpenMPLocThreadIDMap.end()) { 1748 ThreadID = I->second.ThreadID; 1749 if (ThreadID != nullptr) 1750 return ThreadID; 1751 } 1752 // If exceptions are enabled, do not use parameter to avoid possible crash. 1753 if (auto *OMPRegionInfo = 1754 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1755 if (OMPRegionInfo->getThreadIDVariable()) { 1756 // Check if this an outlined function with thread id passed as argument. 1757 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1758 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1759 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1760 !CGF.getLangOpts().CXXExceptions || 1761 CGF.Builder.GetInsertBlock() == TopBlock || 1762 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1763 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1764 TopBlock || 1765 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1766 CGF.Builder.GetInsertBlock()) { 1767 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1768 // If value loaded in entry block, cache it and use it everywhere in 1769 // function. 1770 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1771 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1772 Elem.second.ThreadID = ThreadID; 1773 } 1774 return ThreadID; 1775 } 1776 } 1777 } 1778 1779 // This is not an outlined function region - need to call __kmpc_int32 1780 // kmpc_global_thread_num(ident_t *loc). 1781 // Generate thread id value and cache this value for use across the 1782 // function. 1783 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1784 if (!Elem.second.ServiceInsertPt) 1785 setLocThreadIdInsertPt(CGF); 1786 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1787 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1788 llvm::CallInst *Call = CGF.Builder.CreateCall( 1789 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1790 emitUpdateLocation(CGF, Loc)); 1791 Call->setCallingConv(CGF.getRuntimeCC()); 1792 Elem.second.ThreadID = Call; 1793 return Call; 1794 } 1795 1796 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1797 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1798 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1799 clearLocThreadIdInsertPt(CGF); 1800 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1801 } 1802 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1803 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1804 UDRMap.erase(D); 1805 FunctionUDRMap.erase(CGF.CurFn); 1806 } 1807 auto I = FunctionUDMMap.find(CGF.CurFn); 1808 if (I != FunctionUDMMap.end()) { 1809 for(auto *D : I->second) 1810 UDMMap.erase(D); 1811 FunctionUDMMap.erase(I); 1812 } 1813 } 1814 1815 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1816 return IdentTy->getPointerTo(); 1817 } 1818 1819 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1820 if (!Kmpc_MicroTy) { 1821 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1822 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1823 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1824 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1825 } 1826 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1827 } 1828 1829 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1830 llvm::FunctionCallee RTLFn = nullptr; 1831 switch (static_cast<OpenMPRTLFunction>(Function)) { 1832 case OMPRTL__kmpc_fork_call: { 1833 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1834 // microtask, ...); 1835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1836 getKmpc_MicroPointerTy()}; 1837 auto *FnTy = 1838 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1839 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1840 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1841 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1842 llvm::LLVMContext &Ctx = F->getContext(); 1843 llvm::MDBuilder MDB(Ctx); 1844 // Annotate the callback behavior of the __kmpc_fork_call: 1845 // - The callback callee is argument number 2 (microtask). 1846 // - The first two arguments of the callback callee are unknown (-1). 1847 // - All variadic arguments to the __kmpc_fork_call are passed to the 1848 // callback callee. 1849 F->addMetadata( 1850 llvm::LLVMContext::MD_callback, 1851 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1852 2, {-1, -1}, 1853 /* VarArgsArePassed */ true)})); 1854 } 1855 } 1856 break; 1857 } 1858 case OMPRTL__kmpc_global_thread_num: { 1859 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1860 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1861 auto *FnTy = 1862 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1863 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1864 break; 1865 } 1866 case OMPRTL__kmpc_threadprivate_cached: { 1867 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1868 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1869 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1870 CGM.VoidPtrTy, CGM.SizeTy, 1871 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1872 auto *FnTy = 1873 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1875 break; 1876 } 1877 case OMPRTL__kmpc_critical: { 1878 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1879 // kmp_critical_name *crit); 1880 llvm::Type *TypeParams[] = { 1881 getIdentTyPointerTy(), CGM.Int32Ty, 1882 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1883 auto *FnTy = 1884 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1886 break; 1887 } 1888 case OMPRTL__kmpc_critical_with_hint: { 1889 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1890 // kmp_critical_name *crit, uintptr_t hint); 1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1892 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1893 CGM.IntPtrTy}; 1894 auto *FnTy = 1895 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1896 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1897 break; 1898 } 1899 case OMPRTL__kmpc_threadprivate_register: { 1900 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1901 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1902 // typedef void *(*kmpc_ctor)(void *); 1903 auto *KmpcCtorTy = 1904 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1905 /*isVarArg*/ false)->getPointerTo(); 1906 // typedef void *(*kmpc_cctor)(void *, void *); 1907 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1908 auto *KmpcCopyCtorTy = 1909 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1910 /*isVarArg*/ false) 1911 ->getPointerTo(); 1912 // typedef void (*kmpc_dtor)(void *); 1913 auto *KmpcDtorTy = 1914 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1915 ->getPointerTo(); 1916 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1917 KmpcCopyCtorTy, KmpcDtorTy}; 1918 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1919 /*isVarArg*/ false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_end_critical: { 1924 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1925 // kmp_critical_name *crit); 1926 llvm::Type *TypeParams[] = { 1927 getIdentTyPointerTy(), CGM.Int32Ty, 1928 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1929 auto *FnTy = 1930 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1932 break; 1933 } 1934 case OMPRTL__kmpc_cancel_barrier: { 1935 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1936 // global_tid); 1937 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1938 auto *FnTy = 1939 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1941 break; 1942 } 1943 case OMPRTL__kmpc_barrier: { 1944 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_for_static_fini: { 1952 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1954 auto *FnTy = 1955 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1957 break; 1958 } 1959 case OMPRTL__kmpc_push_num_threads: { 1960 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1961 // kmp_int32 num_threads) 1962 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1963 CGM.Int32Ty}; 1964 auto *FnTy = 1965 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1966 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1967 break; 1968 } 1969 case OMPRTL__kmpc_serialized_parallel: { 1970 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1971 // global_tid); 1972 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1973 auto *FnTy = 1974 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1975 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1976 break; 1977 } 1978 case OMPRTL__kmpc_end_serialized_parallel: { 1979 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1980 // global_tid); 1981 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1982 auto *FnTy = 1983 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1984 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1985 break; 1986 } 1987 case OMPRTL__kmpc_flush: { 1988 // Build void __kmpc_flush(ident_t *loc); 1989 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1990 auto *FnTy = 1991 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1992 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1993 break; 1994 } 1995 case OMPRTL__kmpc_master: { 1996 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1997 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1998 auto *FnTy = 1999 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2000 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2001 break; 2002 } 2003 case OMPRTL__kmpc_end_master: { 2004 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2005 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2006 auto *FnTy = 2007 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2009 break; 2010 } 2011 case OMPRTL__kmpc_omp_taskyield: { 2012 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2013 // int end_part); 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2015 auto *FnTy = 2016 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2017 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2018 break; 2019 } 2020 case OMPRTL__kmpc_single: { 2021 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2022 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2023 auto *FnTy = 2024 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2025 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2026 break; 2027 } 2028 case OMPRTL__kmpc_end_single: { 2029 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2030 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_omp_task_alloc: { 2037 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2038 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2039 // kmp_routine_entry_t *task_entry); 2040 assert(KmpRoutineEntryPtrTy != nullptr && 2041 "Type kmp_routine_entry_t must be created."); 2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2043 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2044 // Return void * and then cast to particular kmp_task_t type. 2045 auto *FnTy = 2046 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2047 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2048 break; 2049 } 2050 case OMPRTL__kmpc_omp_target_task_alloc: { 2051 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2052 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2053 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2054 assert(KmpRoutineEntryPtrTy != nullptr && 2055 "Type kmp_routine_entry_t must be created."); 2056 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2057 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2058 CGM.Int64Ty}; 2059 // Return void * and then cast to particular kmp_task_t type. 2060 auto *FnTy = 2061 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2062 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2063 break; 2064 } 2065 case OMPRTL__kmpc_omp_task: { 2066 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2067 // *new_task); 2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2069 CGM.VoidPtrTy}; 2070 auto *FnTy = 2071 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2072 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2073 break; 2074 } 2075 case OMPRTL__kmpc_copyprivate: { 2076 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2077 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2078 // kmp_int32 didit); 2079 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2080 auto *CpyFnTy = 2081 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2083 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2084 CGM.Int32Ty}; 2085 auto *FnTy = 2086 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2087 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2088 break; 2089 } 2090 case OMPRTL__kmpc_reduce: { 2091 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2092 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2093 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2094 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2095 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2096 /*isVarArg=*/false); 2097 llvm::Type *TypeParams[] = { 2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2099 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2100 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2101 auto *FnTy = 2102 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2103 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2104 break; 2105 } 2106 case OMPRTL__kmpc_reduce_nowait: { 2107 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2108 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2109 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2110 // *lck); 2111 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2112 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2113 /*isVarArg=*/false); 2114 llvm::Type *TypeParams[] = { 2115 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2116 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2117 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2118 auto *FnTy = 2119 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2120 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2121 break; 2122 } 2123 case OMPRTL__kmpc_end_reduce: { 2124 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2125 // kmp_critical_name *lck); 2126 llvm::Type *TypeParams[] = { 2127 getIdentTyPointerTy(), CGM.Int32Ty, 2128 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2129 auto *FnTy = 2130 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2131 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2132 break; 2133 } 2134 case OMPRTL__kmpc_end_reduce_nowait: { 2135 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2136 // kmp_critical_name *lck); 2137 llvm::Type *TypeParams[] = { 2138 getIdentTyPointerTy(), CGM.Int32Ty, 2139 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2140 auto *FnTy = 2141 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2142 RTLFn = 2143 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_omp_task_begin_if0: { 2147 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2148 // *new_task); 2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2150 CGM.VoidPtrTy}; 2151 auto *FnTy = 2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2153 RTLFn = 2154 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_omp_task_complete_if0: { 2158 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2159 // *new_task); 2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2161 CGM.VoidPtrTy}; 2162 auto *FnTy = 2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2164 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2165 /*Name=*/"__kmpc_omp_task_complete_if0"); 2166 break; 2167 } 2168 case OMPRTL__kmpc_ordered: { 2169 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2170 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2171 auto *FnTy = 2172 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2173 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2174 break; 2175 } 2176 case OMPRTL__kmpc_end_ordered: { 2177 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2178 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2179 auto *FnTy = 2180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2181 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2182 break; 2183 } 2184 case OMPRTL__kmpc_omp_taskwait: { 2185 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2186 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2187 auto *FnTy = 2188 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2190 break; 2191 } 2192 case OMPRTL__kmpc_taskgroup: { 2193 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2194 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2195 auto *FnTy = 2196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2197 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2198 break; 2199 } 2200 case OMPRTL__kmpc_end_taskgroup: { 2201 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2202 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2203 auto *FnTy = 2204 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2205 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2206 break; 2207 } 2208 case OMPRTL__kmpc_push_proc_bind: { 2209 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2210 // int proc_bind) 2211 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2212 auto *FnTy = 2213 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2214 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2215 break; 2216 } 2217 case OMPRTL__kmpc_omp_task_with_deps: { 2218 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2219 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2220 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2221 llvm::Type *TypeParams[] = { 2222 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2223 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2224 auto *FnTy = 2225 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2226 RTLFn = 2227 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2228 break; 2229 } 2230 case OMPRTL__kmpc_omp_wait_deps: { 2231 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2232 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2233 // kmp_depend_info_t *noalias_dep_list); 2234 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2235 CGM.Int32Ty, CGM.VoidPtrTy, 2236 CGM.Int32Ty, CGM.VoidPtrTy}; 2237 auto *FnTy = 2238 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2239 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2240 break; 2241 } 2242 case OMPRTL__kmpc_cancellationpoint: { 2243 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2244 // global_tid, kmp_int32 cncl_kind) 2245 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2246 auto *FnTy = 2247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2248 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2249 break; 2250 } 2251 case OMPRTL__kmpc_cancel: { 2252 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2253 // kmp_int32 cncl_kind) 2254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2255 auto *FnTy = 2256 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2257 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2258 break; 2259 } 2260 case OMPRTL__kmpc_push_num_teams: { 2261 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2262 // kmp_int32 num_teams, kmp_int32 num_threads) 2263 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2264 CGM.Int32Ty}; 2265 auto *FnTy = 2266 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2268 break; 2269 } 2270 case OMPRTL__kmpc_fork_teams: { 2271 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2272 // microtask, ...); 2273 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2274 getKmpc_MicroPointerTy()}; 2275 auto *FnTy = 2276 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2277 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2278 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2279 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2280 llvm::LLVMContext &Ctx = F->getContext(); 2281 llvm::MDBuilder MDB(Ctx); 2282 // Annotate the callback behavior of the __kmpc_fork_teams: 2283 // - The callback callee is argument number 2 (microtask). 2284 // - The first two arguments of the callback callee are unknown (-1). 2285 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2286 // callback callee. 2287 F->addMetadata( 2288 llvm::LLVMContext::MD_callback, 2289 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2290 2, {-1, -1}, 2291 /* VarArgsArePassed */ true)})); 2292 } 2293 } 2294 break; 2295 } 2296 case OMPRTL__kmpc_taskloop: { 2297 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2298 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2299 // sched, kmp_uint64 grainsize, void *task_dup); 2300 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2301 CGM.IntTy, 2302 CGM.VoidPtrTy, 2303 CGM.IntTy, 2304 CGM.Int64Ty->getPointerTo(), 2305 CGM.Int64Ty->getPointerTo(), 2306 CGM.Int64Ty, 2307 CGM.IntTy, 2308 CGM.IntTy, 2309 CGM.Int64Ty, 2310 CGM.VoidPtrTy}; 2311 auto *FnTy = 2312 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2313 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2314 break; 2315 } 2316 case OMPRTL__kmpc_doacross_init: { 2317 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2318 // num_dims, struct kmp_dim *dims); 2319 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2320 CGM.Int32Ty, 2321 CGM.Int32Ty, 2322 CGM.VoidPtrTy}; 2323 auto *FnTy = 2324 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2325 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2326 break; 2327 } 2328 case OMPRTL__kmpc_doacross_fini: { 2329 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2330 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2331 auto *FnTy = 2332 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2333 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2334 break; 2335 } 2336 case OMPRTL__kmpc_doacross_post: { 2337 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2338 // *vec); 2339 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2340 CGM.Int64Ty->getPointerTo()}; 2341 auto *FnTy = 2342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2343 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2344 break; 2345 } 2346 case OMPRTL__kmpc_doacross_wait: { 2347 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2348 // *vec); 2349 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2350 CGM.Int64Ty->getPointerTo()}; 2351 auto *FnTy = 2352 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2353 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2354 break; 2355 } 2356 case OMPRTL__kmpc_task_reduction_init: { 2357 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2358 // *data); 2359 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2362 RTLFn = 2363 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2364 break; 2365 } 2366 case OMPRTL__kmpc_task_reduction_get_th_data: { 2367 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2368 // *d); 2369 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2372 RTLFn = CGM.CreateRuntimeFunction( 2373 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2374 break; 2375 } 2376 case OMPRTL__kmpc_alloc: { 2377 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2378 // al); omp_allocator_handle_t type is void *. 2379 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2380 auto *FnTy = 2381 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2382 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2383 break; 2384 } 2385 case OMPRTL__kmpc_free: { 2386 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2387 // al); omp_allocator_handle_t type is void *. 2388 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2389 auto *FnTy = 2390 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2391 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2392 break; 2393 } 2394 case OMPRTL__kmpc_push_target_tripcount: { 2395 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2396 // size); 2397 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2398 llvm::FunctionType *FnTy = 2399 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2400 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2401 break; 2402 } 2403 case OMPRTL__tgt_target: { 2404 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2405 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2406 // *arg_types); 2407 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2408 CGM.VoidPtrTy, 2409 CGM.Int32Ty, 2410 CGM.VoidPtrPtrTy, 2411 CGM.VoidPtrPtrTy, 2412 CGM.Int64Ty->getPointerTo(), 2413 CGM.Int64Ty->getPointerTo()}; 2414 auto *FnTy = 2415 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2416 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2417 break; 2418 } 2419 case OMPRTL__tgt_target_nowait: { 2420 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2421 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2422 // int64_t *arg_types); 2423 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2424 CGM.VoidPtrTy, 2425 CGM.Int32Ty, 2426 CGM.VoidPtrPtrTy, 2427 CGM.VoidPtrPtrTy, 2428 CGM.Int64Ty->getPointerTo(), 2429 CGM.Int64Ty->getPointerTo()}; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2433 break; 2434 } 2435 case OMPRTL__tgt_target_teams: { 2436 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2437 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2438 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2439 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2440 CGM.VoidPtrTy, 2441 CGM.Int32Ty, 2442 CGM.VoidPtrPtrTy, 2443 CGM.VoidPtrPtrTy, 2444 CGM.Int64Ty->getPointerTo(), 2445 CGM.Int64Ty->getPointerTo(), 2446 CGM.Int32Ty, 2447 CGM.Int32Ty}; 2448 auto *FnTy = 2449 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2450 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2451 break; 2452 } 2453 case OMPRTL__tgt_target_teams_nowait: { 2454 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2455 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2456 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2457 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2458 CGM.VoidPtrTy, 2459 CGM.Int32Ty, 2460 CGM.VoidPtrPtrTy, 2461 CGM.VoidPtrPtrTy, 2462 CGM.Int64Ty->getPointerTo(), 2463 CGM.Int64Ty->getPointerTo(), 2464 CGM.Int32Ty, 2465 CGM.Int32Ty}; 2466 auto *FnTy = 2467 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2468 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2469 break; 2470 } 2471 case OMPRTL__tgt_register_requires: { 2472 // Build void __tgt_register_requires(int64_t flags); 2473 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2474 auto *FnTy = 2475 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2477 break; 2478 } 2479 case OMPRTL__tgt_target_data_begin: { 2480 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2481 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2482 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2483 CGM.Int32Ty, 2484 CGM.VoidPtrPtrTy, 2485 CGM.VoidPtrPtrTy, 2486 CGM.Int64Ty->getPointerTo(), 2487 CGM.Int64Ty->getPointerTo()}; 2488 auto *FnTy = 2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2490 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2491 break; 2492 } 2493 case OMPRTL__tgt_target_data_begin_nowait: { 2494 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2495 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2496 // *arg_types); 2497 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2498 CGM.Int32Ty, 2499 CGM.VoidPtrPtrTy, 2500 CGM.VoidPtrPtrTy, 2501 CGM.Int64Ty->getPointerTo(), 2502 CGM.Int64Ty->getPointerTo()}; 2503 auto *FnTy = 2504 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2506 break; 2507 } 2508 case OMPRTL__tgt_target_data_end: { 2509 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2510 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2511 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2512 CGM.Int32Ty, 2513 CGM.VoidPtrPtrTy, 2514 CGM.VoidPtrPtrTy, 2515 CGM.Int64Ty->getPointerTo(), 2516 CGM.Int64Ty->getPointerTo()}; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2519 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2520 break; 2521 } 2522 case OMPRTL__tgt_target_data_end_nowait: { 2523 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2524 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2525 // *arg_types); 2526 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2527 CGM.Int32Ty, 2528 CGM.VoidPtrPtrTy, 2529 CGM.VoidPtrPtrTy, 2530 CGM.Int64Ty->getPointerTo(), 2531 CGM.Int64Ty->getPointerTo()}; 2532 auto *FnTy = 2533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2534 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2535 break; 2536 } 2537 case OMPRTL__tgt_target_data_update: { 2538 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2539 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2540 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2541 CGM.Int32Ty, 2542 CGM.VoidPtrPtrTy, 2543 CGM.VoidPtrPtrTy, 2544 CGM.Int64Ty->getPointerTo(), 2545 CGM.Int64Ty->getPointerTo()}; 2546 auto *FnTy = 2547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2549 break; 2550 } 2551 case OMPRTL__tgt_target_data_update_nowait: { 2552 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2553 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2554 // *arg_types); 2555 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2556 CGM.Int32Ty, 2557 CGM.VoidPtrPtrTy, 2558 CGM.VoidPtrPtrTy, 2559 CGM.Int64Ty->getPointerTo(), 2560 CGM.Int64Ty->getPointerTo()}; 2561 auto *FnTy = 2562 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2563 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2564 break; 2565 } 2566 case OMPRTL__tgt_mapper_num_components: { 2567 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2568 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2569 auto *FnTy = 2570 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2572 break; 2573 } 2574 case OMPRTL__tgt_push_mapper_component: { 2575 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2576 // *base, void *begin, int64_t size, int64_t type); 2577 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2578 CGM.Int64Ty, CGM.Int64Ty}; 2579 auto *FnTy = 2580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2581 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2582 break; 2583 } 2584 } 2585 assert(RTLFn && "Unable to find OpenMP runtime function"); 2586 return RTLFn; 2587 } 2588 2589 llvm::FunctionCallee 2590 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2591 assert((IVSize == 32 || IVSize == 64) && 2592 "IV size is not compatible with the omp runtime"); 2593 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2594 : "__kmpc_for_static_init_4u") 2595 : (IVSigned ? "__kmpc_for_static_init_8" 2596 : "__kmpc_for_static_init_8u"); 2597 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2598 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2599 llvm::Type *TypeParams[] = { 2600 getIdentTyPointerTy(), // loc 2601 CGM.Int32Ty, // tid 2602 CGM.Int32Ty, // schedtype 2603 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2604 PtrTy, // p_lower 2605 PtrTy, // p_upper 2606 PtrTy, // p_stride 2607 ITy, // incr 2608 ITy // chunk 2609 }; 2610 auto *FnTy = 2611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2612 return CGM.CreateRuntimeFunction(FnTy, Name); 2613 } 2614 2615 llvm::FunctionCallee 2616 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2617 assert((IVSize == 32 || IVSize == 64) && 2618 "IV size is not compatible with the omp runtime"); 2619 StringRef Name = 2620 IVSize == 32 2621 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2622 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2623 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2624 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2625 CGM.Int32Ty, // tid 2626 CGM.Int32Ty, // schedtype 2627 ITy, // lower 2628 ITy, // upper 2629 ITy, // stride 2630 ITy // chunk 2631 }; 2632 auto *FnTy = 2633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2634 return CGM.CreateRuntimeFunction(FnTy, Name); 2635 } 2636 2637 llvm::FunctionCallee 2638 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2639 assert((IVSize == 32 || IVSize == 64) && 2640 "IV size is not compatible with the omp runtime"); 2641 StringRef Name = 2642 IVSize == 32 2643 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2644 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2645 llvm::Type *TypeParams[] = { 2646 getIdentTyPointerTy(), // loc 2647 CGM.Int32Ty, // tid 2648 }; 2649 auto *FnTy = 2650 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2651 return CGM.CreateRuntimeFunction(FnTy, Name); 2652 } 2653 2654 llvm::FunctionCallee 2655 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2656 assert((IVSize == 32 || IVSize == 64) && 2657 "IV size is not compatible with the omp runtime"); 2658 StringRef Name = 2659 IVSize == 32 2660 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2661 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2662 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2663 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2664 llvm::Type *TypeParams[] = { 2665 getIdentTyPointerTy(), // loc 2666 CGM.Int32Ty, // tid 2667 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2668 PtrTy, // p_lower 2669 PtrTy, // p_upper 2670 PtrTy // p_stride 2671 }; 2672 auto *FnTy = 2673 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2674 return CGM.CreateRuntimeFunction(FnTy, Name); 2675 } 2676 2677 /// Obtain information that uniquely identifies a target entry. This 2678 /// consists of the file and device IDs as well as line number associated with 2679 /// the relevant entry source location. 2680 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2681 unsigned &DeviceID, unsigned &FileID, 2682 unsigned &LineNum) { 2683 SourceManager &SM = C.getSourceManager(); 2684 2685 // The loc should be always valid and have a file ID (the user cannot use 2686 // #pragma directives in macros) 2687 2688 assert(Loc.isValid() && "Source location is expected to be always valid."); 2689 2690 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2691 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2692 2693 llvm::sys::fs::UniqueID ID; 2694 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2695 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2696 << PLoc.getFilename() << EC.message(); 2697 2698 DeviceID = ID.getDevice(); 2699 FileID = ID.getFile(); 2700 LineNum = PLoc.getLine(); 2701 } 2702 2703 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2704 if (CGM.getLangOpts().OpenMPSimd) 2705 return Address::invalid(); 2706 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2707 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2708 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2709 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2710 HasRequiresUnifiedSharedMemory))) { 2711 SmallString<64> PtrName; 2712 { 2713 llvm::raw_svector_ostream OS(PtrName); 2714 OS << CGM.getMangledName(GlobalDecl(VD)); 2715 if (!VD->isExternallyVisible()) { 2716 unsigned DeviceID, FileID, Line; 2717 getTargetEntryUniqueInfo(CGM.getContext(), 2718 VD->getCanonicalDecl()->getBeginLoc(), 2719 DeviceID, FileID, Line); 2720 OS << llvm::format("_%x", FileID); 2721 } 2722 OS << "_decl_tgt_ref_ptr"; 2723 } 2724 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2725 if (!Ptr) { 2726 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2727 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2728 PtrName); 2729 2730 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2731 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2732 2733 if (!CGM.getLangOpts().OpenMPIsDevice) 2734 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2735 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2736 } 2737 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2738 } 2739 return Address::invalid(); 2740 } 2741 2742 llvm::Constant * 2743 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2744 assert(!CGM.getLangOpts().OpenMPUseTLS || 2745 !CGM.getContext().getTargetInfo().isTLSSupported()); 2746 // Lookup the entry, lazily creating it if necessary. 2747 std::string Suffix = getName({"cache", ""}); 2748 return getOrCreateInternalVariable( 2749 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2750 } 2751 2752 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2753 const VarDecl *VD, 2754 Address VDAddr, 2755 SourceLocation Loc) { 2756 if (CGM.getLangOpts().OpenMPUseTLS && 2757 CGM.getContext().getTargetInfo().isTLSSupported()) 2758 return VDAddr; 2759 2760 llvm::Type *VarTy = VDAddr.getElementType(); 2761 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2762 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2763 CGM.Int8PtrTy), 2764 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2765 getOrCreateThreadPrivateCache(VD)}; 2766 return Address(CGF.EmitRuntimeCall( 2767 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2768 VDAddr.getAlignment()); 2769 } 2770 2771 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2772 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2773 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2774 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2775 // library. 2776 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2778 OMPLoc); 2779 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2780 // to register constructor/destructor for variable. 2781 llvm::Value *Args[] = { 2782 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2783 Ctor, CopyCtor, Dtor}; 2784 CGF.EmitRuntimeCall( 2785 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2786 } 2787 2788 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2789 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2790 bool PerformInit, CodeGenFunction *CGF) { 2791 if (CGM.getLangOpts().OpenMPUseTLS && 2792 CGM.getContext().getTargetInfo().isTLSSupported()) 2793 return nullptr; 2794 2795 VD = VD->getDefinition(CGM.getContext()); 2796 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2797 QualType ASTTy = VD->getType(); 2798 2799 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2800 const Expr *Init = VD->getAnyInitializer(); 2801 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2802 // Generate function that re-emits the declaration's initializer into the 2803 // threadprivate copy of the variable VD 2804 CodeGenFunction CtorCGF(CGM); 2805 FunctionArgList Args; 2806 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2807 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2808 ImplicitParamDecl::Other); 2809 Args.push_back(&Dst); 2810 2811 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2812 CGM.getContext().VoidPtrTy, Args); 2813 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2814 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2815 llvm::Function *Fn = 2816 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2817 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2818 Args, Loc, Loc); 2819 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2821 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2822 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2823 Arg = CtorCGF.Builder.CreateElementBitCast( 2824 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2825 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2826 /*IsInitializer=*/true); 2827 ArgVal = CtorCGF.EmitLoadOfScalar( 2828 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2829 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2830 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2831 CtorCGF.FinishFunction(); 2832 Ctor = Fn; 2833 } 2834 if (VD->getType().isDestructedType() != QualType::DK_none) { 2835 // Generate function that emits destructor call for the threadprivate copy 2836 // of the variable VD 2837 CodeGenFunction DtorCGF(CGM); 2838 FunctionArgList Args; 2839 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2840 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2841 ImplicitParamDecl::Other); 2842 Args.push_back(&Dst); 2843 2844 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2845 CGM.getContext().VoidTy, Args); 2846 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2847 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2848 llvm::Function *Fn = 2849 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2850 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2851 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2852 Loc, Loc); 2853 // Create a scope with an artificial location for the body of this function. 2854 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2855 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2856 DtorCGF.GetAddrOfLocalVar(&Dst), 2857 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2858 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2859 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2860 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2861 DtorCGF.FinishFunction(); 2862 Dtor = Fn; 2863 } 2864 // Do not emit init function if it is not required. 2865 if (!Ctor && !Dtor) 2866 return nullptr; 2867 2868 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2869 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2870 /*isVarArg=*/false) 2871 ->getPointerTo(); 2872 // Copying constructor for the threadprivate variable. 2873 // Must be NULL - reserved by runtime, but currently it requires that this 2874 // parameter is always NULL. Otherwise it fires assertion. 2875 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2876 if (Ctor == nullptr) { 2877 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2878 /*isVarArg=*/false) 2879 ->getPointerTo(); 2880 Ctor = llvm::Constant::getNullValue(CtorTy); 2881 } 2882 if (Dtor == nullptr) { 2883 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2884 /*isVarArg=*/false) 2885 ->getPointerTo(); 2886 Dtor = llvm::Constant::getNullValue(DtorTy); 2887 } 2888 if (!CGF) { 2889 auto *InitFunctionTy = 2890 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2891 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2892 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2893 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2894 CodeGenFunction InitCGF(CGM); 2895 FunctionArgList ArgList; 2896 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2897 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2898 Loc, Loc); 2899 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2900 InitCGF.FinishFunction(); 2901 return InitFunction; 2902 } 2903 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2904 } 2905 return nullptr; 2906 } 2907 2908 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2909 llvm::GlobalVariable *Addr, 2910 bool PerformInit) { 2911 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2912 !CGM.getLangOpts().OpenMPIsDevice) 2913 return false; 2914 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2915 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2916 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2917 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2918 HasRequiresUnifiedSharedMemory)) 2919 return CGM.getLangOpts().OpenMPIsDevice; 2920 VD = VD->getDefinition(CGM.getContext()); 2921 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2922 return CGM.getLangOpts().OpenMPIsDevice; 2923 2924 QualType ASTTy = VD->getType(); 2925 2926 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2927 // Produce the unique prefix to identify the new target regions. We use 2928 // the source location of the variable declaration which we know to not 2929 // conflict with any target region. 2930 unsigned DeviceID; 2931 unsigned FileID; 2932 unsigned Line; 2933 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2934 SmallString<128> Buffer, Out; 2935 { 2936 llvm::raw_svector_ostream OS(Buffer); 2937 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2938 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2939 } 2940 2941 const Expr *Init = VD->getAnyInitializer(); 2942 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2943 llvm::Constant *Ctor; 2944 llvm::Constant *ID; 2945 if (CGM.getLangOpts().OpenMPIsDevice) { 2946 // Generate function that re-emits the declaration's initializer into 2947 // the threadprivate copy of the variable VD 2948 CodeGenFunction CtorCGF(CGM); 2949 2950 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2951 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2952 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2953 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2954 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2955 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2956 FunctionArgList(), Loc, Loc); 2957 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2958 CtorCGF.EmitAnyExprToMem(Init, 2959 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2960 Init->getType().getQualifiers(), 2961 /*IsInitializer=*/true); 2962 CtorCGF.FinishFunction(); 2963 Ctor = Fn; 2964 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2965 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2966 } else { 2967 Ctor = new llvm::GlobalVariable( 2968 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2969 llvm::GlobalValue::PrivateLinkage, 2970 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2971 ID = Ctor; 2972 } 2973 2974 // Register the information for the entry associated with the constructor. 2975 Out.clear(); 2976 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2977 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2978 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2979 } 2980 if (VD->getType().isDestructedType() != QualType::DK_none) { 2981 llvm::Constant *Dtor; 2982 llvm::Constant *ID; 2983 if (CGM.getLangOpts().OpenMPIsDevice) { 2984 // Generate function that emits destructor call for the threadprivate 2985 // copy of the variable VD 2986 CodeGenFunction DtorCGF(CGM); 2987 2988 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2989 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2990 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2991 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2992 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2993 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2994 FunctionArgList(), Loc, Loc); 2995 // Create a scope with an artificial location for the body of this 2996 // function. 2997 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2998 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2999 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3000 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3001 DtorCGF.FinishFunction(); 3002 Dtor = Fn; 3003 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3004 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3005 } else { 3006 Dtor = new llvm::GlobalVariable( 3007 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3008 llvm::GlobalValue::PrivateLinkage, 3009 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3010 ID = Dtor; 3011 } 3012 // Register the information for the entry associated with the destructor. 3013 Out.clear(); 3014 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3015 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3016 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3017 } 3018 return CGM.getLangOpts().OpenMPIsDevice; 3019 } 3020 3021 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3022 QualType VarType, 3023 StringRef Name) { 3024 std::string Suffix = getName({"artificial", ""}); 3025 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3026 llvm::Value *GAddr = 3027 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3028 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3029 CGM.getTarget().isTLSSupported()) { 3030 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3031 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3032 } 3033 std::string CacheSuffix = getName({"cache", ""}); 3034 llvm::Value *Args[] = { 3035 emitUpdateLocation(CGF, SourceLocation()), 3036 getThreadID(CGF, SourceLocation()), 3037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3038 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3039 /*isSigned=*/false), 3040 getOrCreateInternalVariable( 3041 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3042 return Address( 3043 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3044 CGF.EmitRuntimeCall( 3045 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3046 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3047 CGM.getContext().getTypeAlignInChars(VarType)); 3048 } 3049 3050 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3051 const RegionCodeGenTy &ThenGen, 3052 const RegionCodeGenTy &ElseGen) { 3053 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3054 3055 // If the condition constant folds and can be elided, try to avoid emitting 3056 // the condition and the dead arm of the if/else. 3057 bool CondConstant; 3058 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3059 if (CondConstant) 3060 ThenGen(CGF); 3061 else 3062 ElseGen(CGF); 3063 return; 3064 } 3065 3066 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3067 // emit the conditional branch. 3068 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3069 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3070 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3071 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3072 3073 // Emit the 'then' code. 3074 CGF.EmitBlock(ThenBlock); 3075 ThenGen(CGF); 3076 CGF.EmitBranch(ContBlock); 3077 // Emit the 'else' code if present. 3078 // There is no need to emit line number for unconditional branch. 3079 (void)ApplyDebugLocation::CreateEmpty(CGF); 3080 CGF.EmitBlock(ElseBlock); 3081 ElseGen(CGF); 3082 // There is no need to emit line number for unconditional branch. 3083 (void)ApplyDebugLocation::CreateEmpty(CGF); 3084 CGF.EmitBranch(ContBlock); 3085 // Emit the continuation block for code after the if. 3086 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3087 } 3088 3089 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3090 llvm::Function *OutlinedFn, 3091 ArrayRef<llvm::Value *> CapturedVars, 3092 const Expr *IfCond) { 3093 if (!CGF.HaveInsertPoint()) 3094 return; 3095 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3096 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3097 PrePostActionTy &) { 3098 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3099 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3100 llvm::Value *Args[] = { 3101 RTLoc, 3102 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3103 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3104 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3105 RealArgs.append(std::begin(Args), std::end(Args)); 3106 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3107 3108 llvm::FunctionCallee RTLFn = 3109 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3110 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3111 }; 3112 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3113 PrePostActionTy &) { 3114 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3115 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3116 // Build calls: 3117 // __kmpc_serialized_parallel(&Loc, GTid); 3118 llvm::Value *Args[] = {RTLoc, ThreadID}; 3119 CGF.EmitRuntimeCall( 3120 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3121 3122 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3123 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3124 Address ZeroAddrBound = 3125 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3126 /*Name=*/".bound.zero.addr"); 3127 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3128 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3129 // ThreadId for serialized parallels is 0. 3130 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3131 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3132 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3133 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3134 3135 // __kmpc_end_serialized_parallel(&Loc, GTid); 3136 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3137 CGF.EmitRuntimeCall( 3138 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3139 EndArgs); 3140 }; 3141 if (IfCond) { 3142 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3143 } else { 3144 RegionCodeGenTy ThenRCG(ThenGen); 3145 ThenRCG(CGF); 3146 } 3147 } 3148 3149 // If we're inside an (outlined) parallel region, use the region info's 3150 // thread-ID variable (it is passed in a first argument of the outlined function 3151 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3152 // regular serial code region, get thread ID by calling kmp_int32 3153 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3154 // return the address of that temp. 3155 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3156 SourceLocation Loc) { 3157 if (auto *OMPRegionInfo = 3158 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3159 if (OMPRegionInfo->getThreadIDVariable()) 3160 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3161 3162 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3163 QualType Int32Ty = 3164 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3165 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3166 CGF.EmitStoreOfScalar(ThreadID, 3167 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3168 3169 return ThreadIDTemp; 3170 } 3171 3172 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3173 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3174 SmallString<256> Buffer; 3175 llvm::raw_svector_ostream Out(Buffer); 3176 Out << Name; 3177 StringRef RuntimeName = Out.str(); 3178 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3179 if (Elem.second) { 3180 assert(Elem.second->getType()->getPointerElementType() == Ty && 3181 "OMP internal variable has different type than requested"); 3182 return &*Elem.second; 3183 } 3184 3185 return Elem.second = new llvm::GlobalVariable( 3186 CGM.getModule(), Ty, /*IsConstant*/ false, 3187 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3188 Elem.first(), /*InsertBefore=*/nullptr, 3189 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3190 } 3191 3192 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3193 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3194 std::string Name = getName({Prefix, "var"}); 3195 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3196 } 3197 3198 namespace { 3199 /// Common pre(post)-action for different OpenMP constructs. 3200 class CommonActionTy final : public PrePostActionTy { 3201 llvm::FunctionCallee EnterCallee; 3202 ArrayRef<llvm::Value *> EnterArgs; 3203 llvm::FunctionCallee ExitCallee; 3204 ArrayRef<llvm::Value *> ExitArgs; 3205 bool Conditional; 3206 llvm::BasicBlock *ContBlock = nullptr; 3207 3208 public: 3209 CommonActionTy(llvm::FunctionCallee EnterCallee, 3210 ArrayRef<llvm::Value *> EnterArgs, 3211 llvm::FunctionCallee ExitCallee, 3212 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3213 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3214 ExitArgs(ExitArgs), Conditional(Conditional) {} 3215 void Enter(CodeGenFunction &CGF) override { 3216 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3217 if (Conditional) { 3218 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3219 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3220 ContBlock = CGF.createBasicBlock("omp_if.end"); 3221 // Generate the branch (If-stmt) 3222 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3223 CGF.EmitBlock(ThenBlock); 3224 } 3225 } 3226 void Done(CodeGenFunction &CGF) { 3227 // Emit the rest of blocks/branches 3228 CGF.EmitBranch(ContBlock); 3229 CGF.EmitBlock(ContBlock, true); 3230 } 3231 void Exit(CodeGenFunction &CGF) override { 3232 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3233 } 3234 }; 3235 } // anonymous namespace 3236 3237 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3238 StringRef CriticalName, 3239 const RegionCodeGenTy &CriticalOpGen, 3240 SourceLocation Loc, const Expr *Hint) { 3241 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3242 // CriticalOpGen(); 3243 // __kmpc_end_critical(ident_t *, gtid, Lock); 3244 // Prepare arguments and build a call to __kmpc_critical 3245 if (!CGF.HaveInsertPoint()) 3246 return; 3247 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3248 getCriticalRegionLock(CriticalName)}; 3249 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3250 std::end(Args)); 3251 if (Hint) { 3252 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3253 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3254 } 3255 CommonActionTy Action( 3256 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3257 : OMPRTL__kmpc_critical), 3258 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3259 CriticalOpGen.setAction(Action); 3260 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3261 } 3262 3263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3264 const RegionCodeGenTy &MasterOpGen, 3265 SourceLocation Loc) { 3266 if (!CGF.HaveInsertPoint()) 3267 return; 3268 // if(__kmpc_master(ident_t *, gtid)) { 3269 // MasterOpGen(); 3270 // __kmpc_end_master(ident_t *, gtid); 3271 // } 3272 // Prepare arguments and build a call to __kmpc_master 3273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3274 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3275 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3276 /*Conditional=*/true); 3277 MasterOpGen.setAction(Action); 3278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3279 Action.Done(CGF); 3280 } 3281 3282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3283 SourceLocation Loc) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3287 llvm::Value *Args[] = { 3288 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3289 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3290 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3291 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3292 Region->emitUntiedSwitch(CGF); 3293 } 3294 3295 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3296 const RegionCodeGenTy &TaskgroupOpGen, 3297 SourceLocation Loc) { 3298 if (!CGF.HaveInsertPoint()) 3299 return; 3300 // __kmpc_taskgroup(ident_t *, gtid); 3301 // TaskgroupOpGen(); 3302 // __kmpc_end_taskgroup(ident_t *, gtid); 3303 // Prepare arguments and build a call to __kmpc_taskgroup 3304 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3305 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3306 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3307 Args); 3308 TaskgroupOpGen.setAction(Action); 3309 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3310 } 3311 3312 /// Given an array of pointers to variables, project the address of a 3313 /// given variable. 3314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3315 unsigned Index, const VarDecl *Var) { 3316 // Pull out the pointer to the variable. 3317 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3318 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3319 3320 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3321 Addr = CGF.Builder.CreateElementBitCast( 3322 Addr, CGF.ConvertTypeForMem(Var->getType())); 3323 return Addr; 3324 } 3325 3326 static llvm::Value *emitCopyprivateCopyFunction( 3327 CodeGenModule &CGM, llvm::Type *ArgsType, 3328 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3329 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3330 SourceLocation Loc) { 3331 ASTContext &C = CGM.getContext(); 3332 // void copy_func(void *LHSArg, void *RHSArg); 3333 FunctionArgList Args; 3334 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3335 ImplicitParamDecl::Other); 3336 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3337 ImplicitParamDecl::Other); 3338 Args.push_back(&LHSArg); 3339 Args.push_back(&RHSArg); 3340 const auto &CGFI = 3341 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3342 std::string Name = 3343 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3344 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3345 llvm::GlobalValue::InternalLinkage, Name, 3346 &CGM.getModule()); 3347 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3348 Fn->setDoesNotRecurse(); 3349 CodeGenFunction CGF(CGM); 3350 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3351 // Dest = (void*[n])(LHSArg); 3352 // Src = (void*[n])(RHSArg); 3353 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3354 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3355 ArgsType), CGF.getPointerAlign()); 3356 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3357 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3358 ArgsType), CGF.getPointerAlign()); 3359 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3360 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3361 // ... 3362 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3363 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3364 const auto *DestVar = 3365 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3366 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3367 3368 const auto *SrcVar = 3369 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3370 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3371 3372 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3373 QualType Type = VD->getType(); 3374 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3375 } 3376 CGF.FinishFunction(); 3377 return Fn; 3378 } 3379 3380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3381 const RegionCodeGenTy &SingleOpGen, 3382 SourceLocation Loc, 3383 ArrayRef<const Expr *> CopyprivateVars, 3384 ArrayRef<const Expr *> SrcExprs, 3385 ArrayRef<const Expr *> DstExprs, 3386 ArrayRef<const Expr *> AssignmentOps) { 3387 if (!CGF.HaveInsertPoint()) 3388 return; 3389 assert(CopyprivateVars.size() == SrcExprs.size() && 3390 CopyprivateVars.size() == DstExprs.size() && 3391 CopyprivateVars.size() == AssignmentOps.size()); 3392 ASTContext &C = CGM.getContext(); 3393 // int32 did_it = 0; 3394 // if(__kmpc_single(ident_t *, gtid)) { 3395 // SingleOpGen(); 3396 // __kmpc_end_single(ident_t *, gtid); 3397 // did_it = 1; 3398 // } 3399 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3400 // <copy_func>, did_it); 3401 3402 Address DidIt = Address::invalid(); 3403 if (!CopyprivateVars.empty()) { 3404 // int32 did_it = 0; 3405 QualType KmpInt32Ty = 3406 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3407 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3408 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3409 } 3410 // Prepare arguments and build a call to __kmpc_single 3411 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3412 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3413 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3414 /*Conditional=*/true); 3415 SingleOpGen.setAction(Action); 3416 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3417 if (DidIt.isValid()) { 3418 // did_it = 1; 3419 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3420 } 3421 Action.Done(CGF); 3422 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3423 // <copy_func>, did_it); 3424 if (DidIt.isValid()) { 3425 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3426 QualType CopyprivateArrayTy = C.getConstantArrayType( 3427 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3428 /*IndexTypeQuals=*/0); 3429 // Create a list of all private variables for copyprivate. 3430 Address CopyprivateList = 3431 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3432 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3433 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3434 CGF.Builder.CreateStore( 3435 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3436 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3437 CGF.VoidPtrTy), 3438 Elem); 3439 } 3440 // Build function that copies private values from single region to all other 3441 // threads in the corresponding parallel region. 3442 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3443 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3444 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3445 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3446 Address CL = 3447 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3448 CGF.VoidPtrTy); 3449 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3450 llvm::Value *Args[] = { 3451 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3452 getThreadID(CGF, Loc), // i32 <gtid> 3453 BufSize, // size_t <buf_size> 3454 CL.getPointer(), // void *<copyprivate list> 3455 CpyFn, // void (*) (void *, void *) <copy_func> 3456 DidItVal // i32 did_it 3457 }; 3458 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3459 } 3460 } 3461 3462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3463 const RegionCodeGenTy &OrderedOpGen, 3464 SourceLocation Loc, bool IsThreads) { 3465 if (!CGF.HaveInsertPoint()) 3466 return; 3467 // __kmpc_ordered(ident_t *, gtid); 3468 // OrderedOpGen(); 3469 // __kmpc_end_ordered(ident_t *, gtid); 3470 // Prepare arguments and build a call to __kmpc_ordered 3471 if (IsThreads) { 3472 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3473 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3474 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3475 Args); 3476 OrderedOpGen.setAction(Action); 3477 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3478 return; 3479 } 3480 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3481 } 3482 3483 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3484 unsigned Flags; 3485 if (Kind == OMPD_for) 3486 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3487 else if (Kind == OMPD_sections) 3488 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3489 else if (Kind == OMPD_single) 3490 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3491 else if (Kind == OMPD_barrier) 3492 Flags = OMP_IDENT_BARRIER_EXPL; 3493 else 3494 Flags = OMP_IDENT_BARRIER_IMPL; 3495 return Flags; 3496 } 3497 3498 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3499 CodeGenFunction &CGF, const OMPLoopDirective &S, 3500 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3501 // Check if the loop directive is actually a doacross loop directive. In this 3502 // case choose static, 1 schedule. 3503 if (llvm::any_of( 3504 S.getClausesOfKind<OMPOrderedClause>(), 3505 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3506 ScheduleKind = OMPC_SCHEDULE_static; 3507 // Chunk size is 1 in this case. 3508 llvm::APInt ChunkSize(32, 1); 3509 ChunkExpr = IntegerLiteral::Create( 3510 CGF.getContext(), ChunkSize, 3511 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3512 SourceLocation()); 3513 } 3514 } 3515 3516 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3517 OpenMPDirectiveKind Kind, bool EmitChecks, 3518 bool ForceSimpleCall) { 3519 // Check if we should use the OMPBuilder 3520 auto *OMPRegionInfo = 3521 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3522 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3523 if (OMPBuilder) { 3524 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3525 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3526 return; 3527 } 3528 3529 if (!CGF.HaveInsertPoint()) 3530 return; 3531 // Build call __kmpc_cancel_barrier(loc, thread_id); 3532 // Build call __kmpc_barrier(loc, thread_id); 3533 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3534 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3535 // thread_id); 3536 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3537 getThreadID(CGF, Loc)}; 3538 if (OMPRegionInfo) { 3539 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3540 llvm::Value *Result = CGF.EmitRuntimeCall( 3541 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3542 if (EmitChecks) { 3543 // if (__kmpc_cancel_barrier()) { 3544 // exit from construct; 3545 // } 3546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3547 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3548 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3549 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3550 CGF.EmitBlock(ExitBB); 3551 // exit from construct; 3552 CodeGenFunction::JumpDest CancelDestination = 3553 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3554 CGF.EmitBranchThroughCleanup(CancelDestination); 3555 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3556 } 3557 return; 3558 } 3559 } 3560 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3561 } 3562 3563 /// Map the OpenMP loop schedule to the runtime enumeration. 3564 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3565 bool Chunked, bool Ordered) { 3566 switch (ScheduleKind) { 3567 case OMPC_SCHEDULE_static: 3568 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3569 : (Ordered ? OMP_ord_static : OMP_sch_static); 3570 case OMPC_SCHEDULE_dynamic: 3571 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3572 case OMPC_SCHEDULE_guided: 3573 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3574 case OMPC_SCHEDULE_runtime: 3575 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3576 case OMPC_SCHEDULE_auto: 3577 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3578 case OMPC_SCHEDULE_unknown: 3579 assert(!Chunked && "chunk was specified but schedule kind not known"); 3580 return Ordered ? OMP_ord_static : OMP_sch_static; 3581 } 3582 llvm_unreachable("Unexpected runtime schedule"); 3583 } 3584 3585 /// Map the OpenMP distribute schedule to the runtime enumeration. 3586 static OpenMPSchedType 3587 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3588 // only static is allowed for dist_schedule 3589 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3590 } 3591 3592 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3593 bool Chunked) const { 3594 OpenMPSchedType Schedule = 3595 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3596 return Schedule == OMP_sch_static; 3597 } 3598 3599 bool CGOpenMPRuntime::isStaticNonchunked( 3600 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3601 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3602 return Schedule == OMP_dist_sch_static; 3603 } 3604 3605 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3606 bool Chunked) const { 3607 OpenMPSchedType Schedule = 3608 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3609 return Schedule == OMP_sch_static_chunked; 3610 } 3611 3612 bool CGOpenMPRuntime::isStaticChunked( 3613 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3614 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3615 return Schedule == OMP_dist_sch_static_chunked; 3616 } 3617 3618 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3619 OpenMPSchedType Schedule = 3620 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3621 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3622 return Schedule != OMP_sch_static; 3623 } 3624 3625 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3626 OpenMPScheduleClauseModifier M1, 3627 OpenMPScheduleClauseModifier M2) { 3628 int Modifier = 0; 3629 switch (M1) { 3630 case OMPC_SCHEDULE_MODIFIER_monotonic: 3631 Modifier = OMP_sch_modifier_monotonic; 3632 break; 3633 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3634 Modifier = OMP_sch_modifier_nonmonotonic; 3635 break; 3636 case OMPC_SCHEDULE_MODIFIER_simd: 3637 if (Schedule == OMP_sch_static_chunked) 3638 Schedule = OMP_sch_static_balanced_chunked; 3639 break; 3640 case OMPC_SCHEDULE_MODIFIER_last: 3641 case OMPC_SCHEDULE_MODIFIER_unknown: 3642 break; 3643 } 3644 switch (M2) { 3645 case OMPC_SCHEDULE_MODIFIER_monotonic: 3646 Modifier = OMP_sch_modifier_monotonic; 3647 break; 3648 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3649 Modifier = OMP_sch_modifier_nonmonotonic; 3650 break; 3651 case OMPC_SCHEDULE_MODIFIER_simd: 3652 if (Schedule == OMP_sch_static_chunked) 3653 Schedule = OMP_sch_static_balanced_chunked; 3654 break; 3655 case OMPC_SCHEDULE_MODIFIER_last: 3656 case OMPC_SCHEDULE_MODIFIER_unknown: 3657 break; 3658 } 3659 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3660 // If the static schedule kind is specified or if the ordered clause is 3661 // specified, and if the nonmonotonic modifier is not specified, the effect is 3662 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3663 // modifier is specified, the effect is as if the nonmonotonic modifier is 3664 // specified. 3665 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3666 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3667 Schedule == OMP_sch_static_balanced_chunked || 3668 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3669 Schedule == OMP_dist_sch_static_chunked || 3670 Schedule == OMP_dist_sch_static)) 3671 Modifier = OMP_sch_modifier_nonmonotonic; 3672 } 3673 return Schedule | Modifier; 3674 } 3675 3676 void CGOpenMPRuntime::emitForDispatchInit( 3677 CodeGenFunction &CGF, SourceLocation Loc, 3678 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3679 bool Ordered, const DispatchRTInput &DispatchValues) { 3680 if (!CGF.HaveInsertPoint()) 3681 return; 3682 OpenMPSchedType Schedule = getRuntimeSchedule( 3683 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3684 assert(Ordered || 3685 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3686 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3687 Schedule != OMP_sch_static_balanced_chunked)); 3688 // Call __kmpc_dispatch_init( 3689 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3690 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3691 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3692 3693 // If the Chunk was not specified in the clause - use default value 1. 3694 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3695 : CGF.Builder.getIntN(IVSize, 1); 3696 llvm::Value *Args[] = { 3697 emitUpdateLocation(CGF, Loc), 3698 getThreadID(CGF, Loc), 3699 CGF.Builder.getInt32(addMonoNonMonoModifier( 3700 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3701 DispatchValues.LB, // Lower 3702 DispatchValues.UB, // Upper 3703 CGF.Builder.getIntN(IVSize, 1), // Stride 3704 Chunk // Chunk 3705 }; 3706 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3707 } 3708 3709 static void emitForStaticInitCall( 3710 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3711 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3712 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3713 const CGOpenMPRuntime::StaticRTInput &Values) { 3714 if (!CGF.HaveInsertPoint()) 3715 return; 3716 3717 assert(!Values.Ordered); 3718 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3719 Schedule == OMP_sch_static_balanced_chunked || 3720 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3721 Schedule == OMP_dist_sch_static || 3722 Schedule == OMP_dist_sch_static_chunked); 3723 3724 // Call __kmpc_for_static_init( 3725 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3726 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3727 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3728 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3729 llvm::Value *Chunk = Values.Chunk; 3730 if (Chunk == nullptr) { 3731 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3732 Schedule == OMP_dist_sch_static) && 3733 "expected static non-chunked schedule"); 3734 // If the Chunk was not specified in the clause - use default value 1. 3735 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3736 } else { 3737 assert((Schedule == OMP_sch_static_chunked || 3738 Schedule == OMP_sch_static_balanced_chunked || 3739 Schedule == OMP_ord_static_chunked || 3740 Schedule == OMP_dist_sch_static_chunked) && 3741 "expected static chunked schedule"); 3742 } 3743 llvm::Value *Args[] = { 3744 UpdateLocation, 3745 ThreadId, 3746 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3747 M2)), // Schedule type 3748 Values.IL.getPointer(), // &isLastIter 3749 Values.LB.getPointer(), // &LB 3750 Values.UB.getPointer(), // &UB 3751 Values.ST.getPointer(), // &Stride 3752 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3753 Chunk // Chunk 3754 }; 3755 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3756 } 3757 3758 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3759 SourceLocation Loc, 3760 OpenMPDirectiveKind DKind, 3761 const OpenMPScheduleTy &ScheduleKind, 3762 const StaticRTInput &Values) { 3763 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3764 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3765 assert(isOpenMPWorksharingDirective(DKind) && 3766 "Expected loop-based or sections-based directive."); 3767 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3768 isOpenMPLoopDirective(DKind) 3769 ? OMP_IDENT_WORK_LOOP 3770 : OMP_IDENT_WORK_SECTIONS); 3771 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3772 llvm::FunctionCallee StaticInitFunction = 3773 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3774 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3775 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3776 } 3777 3778 void CGOpenMPRuntime::emitDistributeStaticInit( 3779 CodeGenFunction &CGF, SourceLocation Loc, 3780 OpenMPDistScheduleClauseKind SchedKind, 3781 const CGOpenMPRuntime::StaticRTInput &Values) { 3782 OpenMPSchedType ScheduleNum = 3783 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3784 llvm::Value *UpdatedLocation = 3785 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3786 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3787 llvm::FunctionCallee StaticInitFunction = 3788 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3789 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3790 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3791 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3792 } 3793 3794 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3795 SourceLocation Loc, 3796 OpenMPDirectiveKind DKind) { 3797 if (!CGF.HaveInsertPoint()) 3798 return; 3799 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3800 llvm::Value *Args[] = { 3801 emitUpdateLocation(CGF, Loc, 3802 isOpenMPDistributeDirective(DKind) 3803 ? OMP_IDENT_WORK_DISTRIBUTE 3804 : isOpenMPLoopDirective(DKind) 3805 ? OMP_IDENT_WORK_LOOP 3806 : OMP_IDENT_WORK_SECTIONS), 3807 getThreadID(CGF, Loc)}; 3808 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3809 Args); 3810 } 3811 3812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3813 SourceLocation Loc, 3814 unsigned IVSize, 3815 bool IVSigned) { 3816 if (!CGF.HaveInsertPoint()) 3817 return; 3818 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3819 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3820 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3821 } 3822 3823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3824 SourceLocation Loc, unsigned IVSize, 3825 bool IVSigned, Address IL, 3826 Address LB, Address UB, 3827 Address ST) { 3828 // Call __kmpc_dispatch_next( 3829 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3830 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3831 // kmp_int[32|64] *p_stride); 3832 llvm::Value *Args[] = { 3833 emitUpdateLocation(CGF, Loc), 3834 getThreadID(CGF, Loc), 3835 IL.getPointer(), // &isLastIter 3836 LB.getPointer(), // &Lower 3837 UB.getPointer(), // &Upper 3838 ST.getPointer() // &Stride 3839 }; 3840 llvm::Value *Call = 3841 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3842 return CGF.EmitScalarConversion( 3843 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3844 CGF.getContext().BoolTy, Loc); 3845 } 3846 3847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3848 llvm::Value *NumThreads, 3849 SourceLocation Loc) { 3850 if (!CGF.HaveInsertPoint()) 3851 return; 3852 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3853 llvm::Value *Args[] = { 3854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3855 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3856 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3857 Args); 3858 } 3859 3860 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3861 ProcBindKind ProcBind, 3862 SourceLocation Loc) { 3863 if (!CGF.HaveInsertPoint()) 3864 return; 3865 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3866 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3867 llvm::Value *Args[] = { 3868 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3869 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3870 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3871 } 3872 3873 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3874 SourceLocation Loc) { 3875 if (!CGF.HaveInsertPoint()) 3876 return; 3877 // Build call void __kmpc_flush(ident_t *loc) 3878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3879 emitUpdateLocation(CGF, Loc)); 3880 } 3881 3882 namespace { 3883 /// Indexes of fields for type kmp_task_t. 3884 enum KmpTaskTFields { 3885 /// List of shared variables. 3886 KmpTaskTShareds, 3887 /// Task routine. 3888 KmpTaskTRoutine, 3889 /// Partition id for the untied tasks. 3890 KmpTaskTPartId, 3891 /// Function with call of destructors for private variables. 3892 Data1, 3893 /// Task priority. 3894 Data2, 3895 /// (Taskloops only) Lower bound. 3896 KmpTaskTLowerBound, 3897 /// (Taskloops only) Upper bound. 3898 KmpTaskTUpperBound, 3899 /// (Taskloops only) Stride. 3900 KmpTaskTStride, 3901 /// (Taskloops only) Is last iteration flag. 3902 KmpTaskTLastIter, 3903 /// (Taskloops only) Reduction data. 3904 KmpTaskTReductions, 3905 }; 3906 } // anonymous namespace 3907 3908 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3909 return OffloadEntriesTargetRegion.empty() && 3910 OffloadEntriesDeviceGlobalVar.empty(); 3911 } 3912 3913 /// Initialize target region entry. 3914 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3915 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3916 StringRef ParentName, unsigned LineNum, 3917 unsigned Order) { 3918 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3919 "only required for the device " 3920 "code generation."); 3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3922 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3923 OMPTargetRegionEntryTargetRegion); 3924 ++OffloadingEntriesNum; 3925 } 3926 3927 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3928 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3929 StringRef ParentName, unsigned LineNum, 3930 llvm::Constant *Addr, llvm::Constant *ID, 3931 OMPTargetRegionEntryKind Flags) { 3932 // If we are emitting code for a target, the entry is already initialized, 3933 // only has to be registered. 3934 if (CGM.getLangOpts().OpenMPIsDevice) { 3935 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3936 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3937 DiagnosticsEngine::Error, 3938 "Unable to find target region on line '%0' in the device code."); 3939 CGM.getDiags().Report(DiagID) << LineNum; 3940 return; 3941 } 3942 auto &Entry = 3943 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3944 assert(Entry.isValid() && "Entry not initialized!"); 3945 Entry.setAddress(Addr); 3946 Entry.setID(ID); 3947 Entry.setFlags(Flags); 3948 } else { 3949 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3950 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3951 ++OffloadingEntriesNum; 3952 } 3953 } 3954 3955 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3956 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3957 unsigned LineNum) const { 3958 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3959 if (PerDevice == OffloadEntriesTargetRegion.end()) 3960 return false; 3961 auto PerFile = PerDevice->second.find(FileID); 3962 if (PerFile == PerDevice->second.end()) 3963 return false; 3964 auto PerParentName = PerFile->second.find(ParentName); 3965 if (PerParentName == PerFile->second.end()) 3966 return false; 3967 auto PerLine = PerParentName->second.find(LineNum); 3968 if (PerLine == PerParentName->second.end()) 3969 return false; 3970 // Fail if this entry is already registered. 3971 if (PerLine->second.getAddress() || PerLine->second.getID()) 3972 return false; 3973 return true; 3974 } 3975 3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3977 const OffloadTargetRegionEntryInfoActTy &Action) { 3978 // Scan all target region entries and perform the provided action. 3979 for (const auto &D : OffloadEntriesTargetRegion) 3980 for (const auto &F : D.second) 3981 for (const auto &P : F.second) 3982 for (const auto &L : P.second) 3983 Action(D.first, F.first, P.first(), L.first, L.second); 3984 } 3985 3986 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3987 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3988 OMPTargetGlobalVarEntryKind Flags, 3989 unsigned Order) { 3990 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3991 "only required for the device " 3992 "code generation."); 3993 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3994 ++OffloadingEntriesNum; 3995 } 3996 3997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3998 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3999 CharUnits VarSize, 4000 OMPTargetGlobalVarEntryKind Flags, 4001 llvm::GlobalValue::LinkageTypes Linkage) { 4002 if (CGM.getLangOpts().OpenMPIsDevice) { 4003 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4004 assert(Entry.isValid() && Entry.getFlags() == Flags && 4005 "Entry not initialized!"); 4006 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4007 "Resetting with the new address."); 4008 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4009 if (Entry.getVarSize().isZero()) { 4010 Entry.setVarSize(VarSize); 4011 Entry.setLinkage(Linkage); 4012 } 4013 return; 4014 } 4015 Entry.setVarSize(VarSize); 4016 Entry.setLinkage(Linkage); 4017 Entry.setAddress(Addr); 4018 } else { 4019 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4021 assert(Entry.isValid() && Entry.getFlags() == Flags && 4022 "Entry not initialized!"); 4023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4024 "Resetting with the new address."); 4025 if (Entry.getVarSize().isZero()) { 4026 Entry.setVarSize(VarSize); 4027 Entry.setLinkage(Linkage); 4028 } 4029 return; 4030 } 4031 OffloadEntriesDeviceGlobalVar.try_emplace( 4032 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4033 ++OffloadingEntriesNum; 4034 } 4035 } 4036 4037 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4038 actOnDeviceGlobalVarEntriesInfo( 4039 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4040 // Scan all target region entries and perform the provided action. 4041 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4042 Action(E.getKey(), E.getValue()); 4043 } 4044 4045 void CGOpenMPRuntime::createOffloadEntry( 4046 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4047 llvm::GlobalValue::LinkageTypes Linkage) { 4048 StringRef Name = Addr->getName(); 4049 llvm::Module &M = CGM.getModule(); 4050 llvm::LLVMContext &C = M.getContext(); 4051 4052 // Create constant string with the name. 4053 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4054 4055 std::string StringName = getName({"omp_offloading", "entry_name"}); 4056 auto *Str = new llvm::GlobalVariable( 4057 M, StrPtrInit->getType(), /*isConstant=*/true, 4058 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4059 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4060 4061 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4062 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4063 llvm::ConstantInt::get(CGM.SizeTy, Size), 4064 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4065 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4066 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4067 llvm::GlobalVariable *Entry = createGlobalStruct( 4068 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4069 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4070 4071 // The entry has to be created in the section the linker expects it to be. 4072 Entry->setSection("omp_offloading_entries"); 4073 } 4074 4075 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4076 // Emit the offloading entries and metadata so that the device codegen side 4077 // can easily figure out what to emit. The produced metadata looks like 4078 // this: 4079 // 4080 // !omp_offload.info = !{!1, ...} 4081 // 4082 // Right now we only generate metadata for function that contain target 4083 // regions. 4084 4085 // If we are in simd mode or there are no entries, we don't need to do 4086 // anything. 4087 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4088 return; 4089 4090 llvm::Module &M = CGM.getModule(); 4091 llvm::LLVMContext &C = M.getContext(); 4092 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4093 SourceLocation, StringRef>, 4094 16> 4095 OrderedEntries(OffloadEntriesInfoManager.size()); 4096 llvm::SmallVector<StringRef, 16> ParentFunctions( 4097 OffloadEntriesInfoManager.size()); 4098 4099 // Auxiliary methods to create metadata values and strings. 4100 auto &&GetMDInt = [this](unsigned V) { 4101 return llvm::ConstantAsMetadata::get( 4102 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4103 }; 4104 4105 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4106 4107 // Create the offloading info metadata node. 4108 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4109 4110 // Create function that emits metadata for each target region entry; 4111 auto &&TargetRegionMetadataEmitter = 4112 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4113 &GetMDString]( 4114 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4115 unsigned Line, 4116 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4117 // Generate metadata for target regions. Each entry of this metadata 4118 // contains: 4119 // - Entry 0 -> Kind of this type of metadata (0). 4120 // - Entry 1 -> Device ID of the file where the entry was identified. 4121 // - Entry 2 -> File ID of the file where the entry was identified. 4122 // - Entry 3 -> Mangled name of the function where the entry was 4123 // identified. 4124 // - Entry 4 -> Line in the file where the entry was identified. 4125 // - Entry 5 -> Order the entry was created. 4126 // The first element of the metadata node is the kind. 4127 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4128 GetMDInt(FileID), GetMDString(ParentName), 4129 GetMDInt(Line), GetMDInt(E.getOrder())}; 4130 4131 SourceLocation Loc; 4132 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4133 E = CGM.getContext().getSourceManager().fileinfo_end(); 4134 I != E; ++I) { 4135 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4136 I->getFirst()->getUniqueID().getFile() == FileID) { 4137 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4138 I->getFirst(), Line, 1); 4139 break; 4140 } 4141 } 4142 // Save this entry in the right position of the ordered entries array. 4143 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4144 ParentFunctions[E.getOrder()] = ParentName; 4145 4146 // Add metadata to the named metadata node. 4147 MD->addOperand(llvm::MDNode::get(C, Ops)); 4148 }; 4149 4150 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4151 TargetRegionMetadataEmitter); 4152 4153 // Create function that emits metadata for each device global variable entry; 4154 auto &&DeviceGlobalVarMetadataEmitter = 4155 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4156 MD](StringRef MangledName, 4157 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4158 &E) { 4159 // Generate metadata for global variables. Each entry of this metadata 4160 // contains: 4161 // - Entry 0 -> Kind of this type of metadata (1). 4162 // - Entry 1 -> Mangled name of the variable. 4163 // - Entry 2 -> Declare target kind. 4164 // - Entry 3 -> Order the entry was created. 4165 // The first element of the metadata node is the kind. 4166 llvm::Metadata *Ops[] = { 4167 GetMDInt(E.getKind()), GetMDString(MangledName), 4168 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4169 4170 // Save this entry in the right position of the ordered entries array. 4171 OrderedEntries[E.getOrder()] = 4172 std::make_tuple(&E, SourceLocation(), MangledName); 4173 4174 // Add metadata to the named metadata node. 4175 MD->addOperand(llvm::MDNode::get(C, Ops)); 4176 }; 4177 4178 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4179 DeviceGlobalVarMetadataEmitter); 4180 4181 for (const auto &E : OrderedEntries) { 4182 assert(std::get<0>(E) && "All ordered entries must exist!"); 4183 if (const auto *CE = 4184 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4185 std::get<0>(E))) { 4186 if (!CE->getID() || !CE->getAddress()) { 4187 // Do not blame the entry if the parent funtion is not emitted. 4188 StringRef FnName = ParentFunctions[CE->getOrder()]; 4189 if (!CGM.GetGlobalValue(FnName)) 4190 continue; 4191 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4192 DiagnosticsEngine::Error, 4193 "Offloading entry for target region in %0 is incorrect: either the " 4194 "address or the ID is invalid."); 4195 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4196 continue; 4197 } 4198 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4199 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4200 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4201 OffloadEntryInfoDeviceGlobalVar>( 4202 std::get<0>(E))) { 4203 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4204 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4205 CE->getFlags()); 4206 switch (Flags) { 4207 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4208 if (CGM.getLangOpts().OpenMPIsDevice && 4209 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4210 continue; 4211 if (!CE->getAddress()) { 4212 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4213 DiagnosticsEngine::Error, "Offloading entry for declare target " 4214 "variable %0 is incorrect: the " 4215 "address is invalid."); 4216 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4217 continue; 4218 } 4219 // The vaiable has no definition - no need to add the entry. 4220 if (CE->getVarSize().isZero()) 4221 continue; 4222 break; 4223 } 4224 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4225 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4226 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4227 "Declaret target link address is set."); 4228 if (CGM.getLangOpts().OpenMPIsDevice) 4229 continue; 4230 if (!CE->getAddress()) { 4231 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4232 DiagnosticsEngine::Error, 4233 "Offloading entry for declare target variable is incorrect: the " 4234 "address is invalid."); 4235 CGM.getDiags().Report(DiagID); 4236 continue; 4237 } 4238 break; 4239 } 4240 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4241 CE->getVarSize().getQuantity(), Flags, 4242 CE->getLinkage()); 4243 } else { 4244 llvm_unreachable("Unsupported entry kind."); 4245 } 4246 } 4247 } 4248 4249 /// Loads all the offload entries information from the host IR 4250 /// metadata. 4251 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4252 // If we are in target mode, load the metadata from the host IR. This code has 4253 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4254 4255 if (!CGM.getLangOpts().OpenMPIsDevice) 4256 return; 4257 4258 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4259 return; 4260 4261 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4262 if (auto EC = Buf.getError()) { 4263 CGM.getDiags().Report(diag::err_cannot_open_file) 4264 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4265 return; 4266 } 4267 4268 llvm::LLVMContext C; 4269 auto ME = expectedToErrorOrAndEmitErrors( 4270 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4271 4272 if (auto EC = ME.getError()) { 4273 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4274 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4275 CGM.getDiags().Report(DiagID) 4276 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4277 return; 4278 } 4279 4280 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4281 if (!MD) 4282 return; 4283 4284 for (llvm::MDNode *MN : MD->operands()) { 4285 auto &&GetMDInt = [MN](unsigned Idx) { 4286 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4287 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4288 }; 4289 4290 auto &&GetMDString = [MN](unsigned Idx) { 4291 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4292 return V->getString(); 4293 }; 4294 4295 switch (GetMDInt(0)) { 4296 default: 4297 llvm_unreachable("Unexpected metadata!"); 4298 break; 4299 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4300 OffloadingEntryInfoTargetRegion: 4301 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4302 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4303 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4304 /*Order=*/GetMDInt(5)); 4305 break; 4306 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4307 OffloadingEntryInfoDeviceGlobalVar: 4308 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4309 /*MangledName=*/GetMDString(1), 4310 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4311 /*Flags=*/GetMDInt(2)), 4312 /*Order=*/GetMDInt(3)); 4313 break; 4314 } 4315 } 4316 } 4317 4318 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4319 if (!KmpRoutineEntryPtrTy) { 4320 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4321 ASTContext &C = CGM.getContext(); 4322 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4323 FunctionProtoType::ExtProtoInfo EPI; 4324 KmpRoutineEntryPtrQTy = C.getPointerType( 4325 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4326 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4327 } 4328 } 4329 4330 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4331 // Make sure the type of the entry is already created. This is the type we 4332 // have to create: 4333 // struct __tgt_offload_entry{ 4334 // void *addr; // Pointer to the offload entry info. 4335 // // (function or global) 4336 // char *name; // Name of the function or global. 4337 // size_t size; // Size of the entry info (0 if it a function). 4338 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4339 // int32_t reserved; // Reserved, to use by the runtime library. 4340 // }; 4341 if (TgtOffloadEntryQTy.isNull()) { 4342 ASTContext &C = CGM.getContext(); 4343 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4344 RD->startDefinition(); 4345 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4346 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4347 addFieldToRecordDecl(C, RD, C.getSizeType()); 4348 addFieldToRecordDecl( 4349 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4350 addFieldToRecordDecl( 4351 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4352 RD->completeDefinition(); 4353 RD->addAttr(PackedAttr::CreateImplicit(C)); 4354 TgtOffloadEntryQTy = C.getRecordType(RD); 4355 } 4356 return TgtOffloadEntryQTy; 4357 } 4358 4359 namespace { 4360 struct PrivateHelpersTy { 4361 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4362 const VarDecl *PrivateElemInit) 4363 : Original(Original), PrivateCopy(PrivateCopy), 4364 PrivateElemInit(PrivateElemInit) {} 4365 const VarDecl *Original; 4366 const VarDecl *PrivateCopy; 4367 const VarDecl *PrivateElemInit; 4368 }; 4369 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4370 } // anonymous namespace 4371 4372 static RecordDecl * 4373 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4374 if (!Privates.empty()) { 4375 ASTContext &C = CGM.getContext(); 4376 // Build struct .kmp_privates_t. { 4377 // /* private vars */ 4378 // }; 4379 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4380 RD->startDefinition(); 4381 for (const auto &Pair : Privates) { 4382 const VarDecl *VD = Pair.second.Original; 4383 QualType Type = VD->getType().getNonReferenceType(); 4384 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4385 if (VD->hasAttrs()) { 4386 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4387 E(VD->getAttrs().end()); 4388 I != E; ++I) 4389 FD->addAttr(*I); 4390 } 4391 } 4392 RD->completeDefinition(); 4393 return RD; 4394 } 4395 return nullptr; 4396 } 4397 4398 static RecordDecl * 4399 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4400 QualType KmpInt32Ty, 4401 QualType KmpRoutineEntryPointerQTy) { 4402 ASTContext &C = CGM.getContext(); 4403 // Build struct kmp_task_t { 4404 // void * shareds; 4405 // kmp_routine_entry_t routine; 4406 // kmp_int32 part_id; 4407 // kmp_cmplrdata_t data1; 4408 // kmp_cmplrdata_t data2; 4409 // For taskloops additional fields: 4410 // kmp_uint64 lb; 4411 // kmp_uint64 ub; 4412 // kmp_int64 st; 4413 // kmp_int32 liter; 4414 // void * reductions; 4415 // }; 4416 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4417 UD->startDefinition(); 4418 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4419 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4420 UD->completeDefinition(); 4421 QualType KmpCmplrdataTy = C.getRecordType(UD); 4422 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4423 RD->startDefinition(); 4424 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4425 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4426 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4427 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4428 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4429 if (isOpenMPTaskLoopDirective(Kind)) { 4430 QualType KmpUInt64Ty = 4431 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4432 QualType KmpInt64Ty = 4433 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4434 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4435 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4436 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4437 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4438 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4439 } 4440 RD->completeDefinition(); 4441 return RD; 4442 } 4443 4444 static RecordDecl * 4445 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4446 ArrayRef<PrivateDataTy> Privates) { 4447 ASTContext &C = CGM.getContext(); 4448 // Build struct kmp_task_t_with_privates { 4449 // kmp_task_t task_data; 4450 // .kmp_privates_t. privates; 4451 // }; 4452 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4453 RD->startDefinition(); 4454 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4455 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4456 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4457 RD->completeDefinition(); 4458 return RD; 4459 } 4460 4461 /// Emit a proxy function which accepts kmp_task_t as the second 4462 /// argument. 4463 /// \code 4464 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4465 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4466 /// For taskloops: 4467 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4468 /// tt->reductions, tt->shareds); 4469 /// return 0; 4470 /// } 4471 /// \endcode 4472 static llvm::Function * 4473 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4474 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4475 QualType KmpTaskTWithPrivatesPtrQTy, 4476 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4477 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4478 llvm::Value *TaskPrivatesMap) { 4479 ASTContext &C = CGM.getContext(); 4480 FunctionArgList Args; 4481 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4482 ImplicitParamDecl::Other); 4483 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4484 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4485 ImplicitParamDecl::Other); 4486 Args.push_back(&GtidArg); 4487 Args.push_back(&TaskTypeArg); 4488 const auto &TaskEntryFnInfo = 4489 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4490 llvm::FunctionType *TaskEntryTy = 4491 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4492 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4493 auto *TaskEntry = llvm::Function::Create( 4494 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4495 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4496 TaskEntry->setDoesNotRecurse(); 4497 CodeGenFunction CGF(CGM); 4498 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4499 Loc, Loc); 4500 4501 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4502 // tt, 4503 // For taskloops: 4504 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4505 // tt->task_data.shareds); 4506 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4507 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4508 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4509 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4510 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4511 const auto *KmpTaskTWithPrivatesQTyRD = 4512 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4513 LValue Base = 4514 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4515 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4516 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4517 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4518 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4519 4520 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4521 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4522 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4523 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4524 CGF.ConvertTypeForMem(SharedsPtrTy)); 4525 4526 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4527 llvm::Value *PrivatesParam; 4528 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4529 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4530 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4531 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4532 } else { 4533 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4534 } 4535 4536 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4537 TaskPrivatesMap, 4538 CGF.Builder 4539 .CreatePointerBitCastOrAddrSpaceCast( 4540 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4541 .getPointer()}; 4542 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4543 std::end(CommonArgs)); 4544 if (isOpenMPTaskLoopDirective(Kind)) { 4545 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4546 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4547 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4548 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4549 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4550 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4551 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4552 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4553 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4554 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4555 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4556 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4557 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4558 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4559 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4560 CallArgs.push_back(LBParam); 4561 CallArgs.push_back(UBParam); 4562 CallArgs.push_back(StParam); 4563 CallArgs.push_back(LIParam); 4564 CallArgs.push_back(RParam); 4565 } 4566 CallArgs.push_back(SharedsParam); 4567 4568 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4569 CallArgs); 4570 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4571 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4572 CGF.FinishFunction(); 4573 return TaskEntry; 4574 } 4575 4576 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4577 SourceLocation Loc, 4578 QualType KmpInt32Ty, 4579 QualType KmpTaskTWithPrivatesPtrQTy, 4580 QualType KmpTaskTWithPrivatesQTy) { 4581 ASTContext &C = CGM.getContext(); 4582 FunctionArgList Args; 4583 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4584 ImplicitParamDecl::Other); 4585 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4586 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4587 ImplicitParamDecl::Other); 4588 Args.push_back(&GtidArg); 4589 Args.push_back(&TaskTypeArg); 4590 const auto &DestructorFnInfo = 4591 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4592 llvm::FunctionType *DestructorFnTy = 4593 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4594 std::string Name = 4595 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4596 auto *DestructorFn = 4597 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4598 Name, &CGM.getModule()); 4599 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4600 DestructorFnInfo); 4601 DestructorFn->setDoesNotRecurse(); 4602 CodeGenFunction CGF(CGM); 4603 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4604 Args, Loc, Loc); 4605 4606 LValue Base = CGF.EmitLoadOfPointerLValue( 4607 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4608 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4609 const auto *KmpTaskTWithPrivatesQTyRD = 4610 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4611 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4612 Base = CGF.EmitLValueForField(Base, *FI); 4613 for (const auto *Field : 4614 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4615 if (QualType::DestructionKind DtorKind = 4616 Field->getType().isDestructedType()) { 4617 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4618 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4619 } 4620 } 4621 CGF.FinishFunction(); 4622 return DestructorFn; 4623 } 4624 4625 /// Emit a privates mapping function for correct handling of private and 4626 /// firstprivate variables. 4627 /// \code 4628 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4629 /// **noalias priv1,..., <tyn> **noalias privn) { 4630 /// *priv1 = &.privates.priv1; 4631 /// ...; 4632 /// *privn = &.privates.privn; 4633 /// } 4634 /// \endcode 4635 static llvm::Value * 4636 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4637 ArrayRef<const Expr *> PrivateVars, 4638 ArrayRef<const Expr *> FirstprivateVars, 4639 ArrayRef<const Expr *> LastprivateVars, 4640 QualType PrivatesQTy, 4641 ArrayRef<PrivateDataTy> Privates) { 4642 ASTContext &C = CGM.getContext(); 4643 FunctionArgList Args; 4644 ImplicitParamDecl TaskPrivatesArg( 4645 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4646 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4647 ImplicitParamDecl::Other); 4648 Args.push_back(&TaskPrivatesArg); 4649 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4650 unsigned Counter = 1; 4651 for (const Expr *E : PrivateVars) { 4652 Args.push_back(ImplicitParamDecl::Create( 4653 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4654 C.getPointerType(C.getPointerType(E->getType())) 4655 .withConst() 4656 .withRestrict(), 4657 ImplicitParamDecl::Other)); 4658 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4659 PrivateVarsPos[VD] = Counter; 4660 ++Counter; 4661 } 4662 for (const Expr *E : FirstprivateVars) { 4663 Args.push_back(ImplicitParamDecl::Create( 4664 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4665 C.getPointerType(C.getPointerType(E->getType())) 4666 .withConst() 4667 .withRestrict(), 4668 ImplicitParamDecl::Other)); 4669 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4670 PrivateVarsPos[VD] = Counter; 4671 ++Counter; 4672 } 4673 for (const Expr *E : LastprivateVars) { 4674 Args.push_back(ImplicitParamDecl::Create( 4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4676 C.getPointerType(C.getPointerType(E->getType())) 4677 .withConst() 4678 .withRestrict(), 4679 ImplicitParamDecl::Other)); 4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4681 PrivateVarsPos[VD] = Counter; 4682 ++Counter; 4683 } 4684 const auto &TaskPrivatesMapFnInfo = 4685 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4686 llvm::FunctionType *TaskPrivatesMapTy = 4687 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4688 std::string Name = 4689 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4690 auto *TaskPrivatesMap = llvm::Function::Create( 4691 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4692 &CGM.getModule()); 4693 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4694 TaskPrivatesMapFnInfo); 4695 if (CGM.getLangOpts().Optimize) { 4696 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4697 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4698 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4699 } 4700 CodeGenFunction CGF(CGM); 4701 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4702 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4703 4704 // *privi = &.privates.privi; 4705 LValue Base = CGF.EmitLoadOfPointerLValue( 4706 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4707 TaskPrivatesArg.getType()->castAs<PointerType>()); 4708 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4709 Counter = 0; 4710 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4711 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4712 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4713 LValue RefLVal = 4714 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4715 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4716 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4717 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4718 ++Counter; 4719 } 4720 CGF.FinishFunction(); 4721 return TaskPrivatesMap; 4722 } 4723 4724 /// Emit initialization for private variables in task-based directives. 4725 static void emitPrivatesInit(CodeGenFunction &CGF, 4726 const OMPExecutableDirective &D, 4727 Address KmpTaskSharedsPtr, LValue TDBase, 4728 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4729 QualType SharedsTy, QualType SharedsPtrTy, 4730 const OMPTaskDataTy &Data, 4731 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4732 ASTContext &C = CGF.getContext(); 4733 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4734 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4735 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4736 ? OMPD_taskloop 4737 : OMPD_task; 4738 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4739 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4740 LValue SrcBase; 4741 bool IsTargetTask = 4742 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4743 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4744 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4745 // PointersArray and SizesArray. The original variables for these arrays are 4746 // not captured and we get their addresses explicitly. 4747 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4748 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4749 SrcBase = CGF.MakeAddrLValue( 4750 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4751 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4752 SharedsTy); 4753 } 4754 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4755 for (const PrivateDataTy &Pair : Privates) { 4756 const VarDecl *VD = Pair.second.PrivateCopy; 4757 const Expr *Init = VD->getAnyInitializer(); 4758 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4759 !CGF.isTrivialInitializer(Init)))) { 4760 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4761 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4762 const VarDecl *OriginalVD = Pair.second.Original; 4763 // Check if the variable is the target-based BasePointersArray, 4764 // PointersArray or SizesArray. 4765 LValue SharedRefLValue; 4766 QualType Type = PrivateLValue.getType(); 4767 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4768 if (IsTargetTask && !SharedField) { 4769 assert(isa<ImplicitParamDecl>(OriginalVD) && 4770 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4771 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4772 ->getNumParams() == 0 && 4773 isa<TranslationUnitDecl>( 4774 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4775 ->getDeclContext()) && 4776 "Expected artificial target data variable."); 4777 SharedRefLValue = 4778 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4779 } else { 4780 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4781 SharedRefLValue = CGF.MakeAddrLValue( 4782 Address(SharedRefLValue.getPointer(CGF), 4783 C.getDeclAlign(OriginalVD)), 4784 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4785 SharedRefLValue.getTBAAInfo()); 4786 } 4787 if (Type->isArrayType()) { 4788 // Initialize firstprivate array. 4789 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4790 // Perform simple memcpy. 4791 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4792 } else { 4793 // Initialize firstprivate array using element-by-element 4794 // initialization. 4795 CGF.EmitOMPAggregateAssign( 4796 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4797 Type, 4798 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4799 Address SrcElement) { 4800 // Clean up any temporaries needed by the initialization. 4801 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4802 InitScope.addPrivate( 4803 Elem, [SrcElement]() -> Address { return SrcElement; }); 4804 (void)InitScope.Privatize(); 4805 // Emit initialization for single element. 4806 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4807 CGF, &CapturesInfo); 4808 CGF.EmitAnyExprToMem(Init, DestElement, 4809 Init->getType().getQualifiers(), 4810 /*IsInitializer=*/false); 4811 }); 4812 } 4813 } else { 4814 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4815 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4816 return SharedRefLValue.getAddress(CGF); 4817 }); 4818 (void)InitScope.Privatize(); 4819 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4820 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4821 /*capturedByInit=*/false); 4822 } 4823 } else { 4824 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4825 } 4826 } 4827 ++FI; 4828 } 4829 } 4830 4831 /// Check if duplication function is required for taskloops. 4832 static bool checkInitIsRequired(CodeGenFunction &CGF, 4833 ArrayRef<PrivateDataTy> Privates) { 4834 bool InitRequired = false; 4835 for (const PrivateDataTy &Pair : Privates) { 4836 const VarDecl *VD = Pair.second.PrivateCopy; 4837 const Expr *Init = VD->getAnyInitializer(); 4838 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4839 !CGF.isTrivialInitializer(Init)); 4840 if (InitRequired) 4841 break; 4842 } 4843 return InitRequired; 4844 } 4845 4846 4847 /// Emit task_dup function (for initialization of 4848 /// private/firstprivate/lastprivate vars and last_iter flag) 4849 /// \code 4850 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4851 /// lastpriv) { 4852 /// // setup lastprivate flag 4853 /// task_dst->last = lastpriv; 4854 /// // could be constructor calls here... 4855 /// } 4856 /// \endcode 4857 static llvm::Value * 4858 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4859 const OMPExecutableDirective &D, 4860 QualType KmpTaskTWithPrivatesPtrQTy, 4861 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4862 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4863 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4864 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4865 ASTContext &C = CGM.getContext(); 4866 FunctionArgList Args; 4867 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4868 KmpTaskTWithPrivatesPtrQTy, 4869 ImplicitParamDecl::Other); 4870 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4871 KmpTaskTWithPrivatesPtrQTy, 4872 ImplicitParamDecl::Other); 4873 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4874 ImplicitParamDecl::Other); 4875 Args.push_back(&DstArg); 4876 Args.push_back(&SrcArg); 4877 Args.push_back(&LastprivArg); 4878 const auto &TaskDupFnInfo = 4879 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4880 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4881 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4882 auto *TaskDup = llvm::Function::Create( 4883 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4884 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4885 TaskDup->setDoesNotRecurse(); 4886 CodeGenFunction CGF(CGM); 4887 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4888 Loc); 4889 4890 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4891 CGF.GetAddrOfLocalVar(&DstArg), 4892 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4893 // task_dst->liter = lastpriv; 4894 if (WithLastIter) { 4895 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4896 LValue Base = CGF.EmitLValueForField( 4897 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4898 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4899 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4900 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4901 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4902 } 4903 4904 // Emit initial values for private copies (if any). 4905 assert(!Privates.empty()); 4906 Address KmpTaskSharedsPtr = Address::invalid(); 4907 if (!Data.FirstprivateVars.empty()) { 4908 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4909 CGF.GetAddrOfLocalVar(&SrcArg), 4910 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4911 LValue Base = CGF.EmitLValueForField( 4912 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4913 KmpTaskSharedsPtr = Address( 4914 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4915 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4916 KmpTaskTShareds)), 4917 Loc), 4918 CGF.getNaturalTypeAlignment(SharedsTy)); 4919 } 4920 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4921 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4922 CGF.FinishFunction(); 4923 return TaskDup; 4924 } 4925 4926 /// Checks if destructor function is required to be generated. 4927 /// \return true if cleanups are required, false otherwise. 4928 static bool 4929 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4930 bool NeedsCleanup = false; 4931 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4932 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4933 for (const FieldDecl *FD : PrivateRD->fields()) { 4934 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4935 if (NeedsCleanup) 4936 break; 4937 } 4938 return NeedsCleanup; 4939 } 4940 4941 CGOpenMPRuntime::TaskResultTy 4942 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4943 const OMPExecutableDirective &D, 4944 llvm::Function *TaskFunction, QualType SharedsTy, 4945 Address Shareds, const OMPTaskDataTy &Data) { 4946 ASTContext &C = CGM.getContext(); 4947 llvm::SmallVector<PrivateDataTy, 4> Privates; 4948 // Aggregate privates and sort them by the alignment. 4949 auto I = Data.PrivateCopies.begin(); 4950 for (const Expr *E : Data.PrivateVars) { 4951 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4952 Privates.emplace_back( 4953 C.getDeclAlign(VD), 4954 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4955 /*PrivateElemInit=*/nullptr)); 4956 ++I; 4957 } 4958 I = Data.FirstprivateCopies.begin(); 4959 auto IElemInitRef = Data.FirstprivateInits.begin(); 4960 for (const Expr *E : Data.FirstprivateVars) { 4961 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4962 Privates.emplace_back( 4963 C.getDeclAlign(VD), 4964 PrivateHelpersTy( 4965 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4966 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4967 ++I; 4968 ++IElemInitRef; 4969 } 4970 I = Data.LastprivateCopies.begin(); 4971 for (const Expr *E : Data.LastprivateVars) { 4972 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4973 Privates.emplace_back( 4974 C.getDeclAlign(VD), 4975 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4976 /*PrivateElemInit=*/nullptr)); 4977 ++I; 4978 } 4979 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4980 return L.first > R.first; 4981 }); 4982 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4983 // Build type kmp_routine_entry_t (if not built yet). 4984 emitKmpRoutineEntryT(KmpInt32Ty); 4985 // Build type kmp_task_t (if not built yet). 4986 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4987 if (SavedKmpTaskloopTQTy.isNull()) { 4988 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4989 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4990 } 4991 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4992 } else { 4993 assert((D.getDirectiveKind() == OMPD_task || 4994 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4995 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4996 "Expected taskloop, task or target directive"); 4997 if (SavedKmpTaskTQTy.isNull()) { 4998 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4999 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5000 } 5001 KmpTaskTQTy = SavedKmpTaskTQTy; 5002 } 5003 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5004 // Build particular struct kmp_task_t for the given task. 5005 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5006 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5007 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5008 QualType KmpTaskTWithPrivatesPtrQTy = 5009 C.getPointerType(KmpTaskTWithPrivatesQTy); 5010 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5011 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5012 KmpTaskTWithPrivatesTy->getPointerTo(); 5013 llvm::Value *KmpTaskTWithPrivatesTySize = 5014 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5015 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5016 5017 // Emit initial values for private copies (if any). 5018 llvm::Value *TaskPrivatesMap = nullptr; 5019 llvm::Type *TaskPrivatesMapTy = 5020 std::next(TaskFunction->arg_begin(), 3)->getType(); 5021 if (!Privates.empty()) { 5022 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5023 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5024 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5025 FI->getType(), Privates); 5026 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5027 TaskPrivatesMap, TaskPrivatesMapTy); 5028 } else { 5029 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5030 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5031 } 5032 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5033 // kmp_task_t *tt); 5034 llvm::Function *TaskEntry = emitProxyTaskFunction( 5035 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5036 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5037 TaskPrivatesMap); 5038 5039 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5040 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5041 // kmp_routine_entry_t *task_entry); 5042 // Task flags. Format is taken from 5043 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5044 // description of kmp_tasking_flags struct. 5045 enum { 5046 TiedFlag = 0x1, 5047 FinalFlag = 0x2, 5048 DestructorsFlag = 0x8, 5049 PriorityFlag = 0x20 5050 }; 5051 unsigned Flags = Data.Tied ? TiedFlag : 0; 5052 bool NeedsCleanup = false; 5053 if (!Privates.empty()) { 5054 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5055 if (NeedsCleanup) 5056 Flags = Flags | DestructorsFlag; 5057 } 5058 if (Data.Priority.getInt()) 5059 Flags = Flags | PriorityFlag; 5060 llvm::Value *TaskFlags = 5061 Data.Final.getPointer() 5062 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5063 CGF.Builder.getInt32(FinalFlag), 5064 CGF.Builder.getInt32(/*C=*/0)) 5065 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5066 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5067 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5068 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5069 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5070 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5071 TaskEntry, KmpRoutineEntryPtrTy)}; 5072 llvm::Value *NewTask; 5073 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5074 // Check if we have any device clause associated with the directive. 5075 const Expr *Device = nullptr; 5076 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5077 Device = C->getDevice(); 5078 // Emit device ID if any otherwise use default value. 5079 llvm::Value *DeviceID; 5080 if (Device) 5081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5082 CGF.Int64Ty, /*isSigned=*/true); 5083 else 5084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5085 AllocArgs.push_back(DeviceID); 5086 NewTask = CGF.EmitRuntimeCall( 5087 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5088 } else { 5089 NewTask = CGF.EmitRuntimeCall( 5090 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5091 } 5092 llvm::Value *NewTaskNewTaskTTy = 5093 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5094 NewTask, KmpTaskTWithPrivatesPtrTy); 5095 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5096 KmpTaskTWithPrivatesQTy); 5097 LValue TDBase = 5098 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5099 // Fill the data in the resulting kmp_task_t record. 5100 // Copy shareds if there are any. 5101 Address KmpTaskSharedsPtr = Address::invalid(); 5102 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5103 KmpTaskSharedsPtr = 5104 Address(CGF.EmitLoadOfScalar( 5105 CGF.EmitLValueForField( 5106 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5107 KmpTaskTShareds)), 5108 Loc), 5109 CGF.getNaturalTypeAlignment(SharedsTy)); 5110 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5111 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5112 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5113 } 5114 // Emit initial values for private copies (if any). 5115 TaskResultTy Result; 5116 if (!Privates.empty()) { 5117 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5118 SharedsTy, SharedsPtrTy, Data, Privates, 5119 /*ForDup=*/false); 5120 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5121 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5122 Result.TaskDupFn = emitTaskDupFunction( 5123 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5124 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5125 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5126 } 5127 } 5128 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5129 enum { Priority = 0, Destructors = 1 }; 5130 // Provide pointer to function with destructors for privates. 5131 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5132 const RecordDecl *KmpCmplrdataUD = 5133 (*FI)->getType()->getAsUnionType()->getDecl(); 5134 if (NeedsCleanup) { 5135 llvm::Value *DestructorFn = emitDestructorsFunction( 5136 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5137 KmpTaskTWithPrivatesQTy); 5138 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5139 LValue DestructorsLV = CGF.EmitLValueForField( 5140 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5141 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5142 DestructorFn, KmpRoutineEntryPtrTy), 5143 DestructorsLV); 5144 } 5145 // Set priority. 5146 if (Data.Priority.getInt()) { 5147 LValue Data2LV = CGF.EmitLValueForField( 5148 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5149 LValue PriorityLV = CGF.EmitLValueForField( 5150 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5151 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5152 } 5153 Result.NewTask = NewTask; 5154 Result.TaskEntry = TaskEntry; 5155 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5156 Result.TDBase = TDBase; 5157 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5158 return Result; 5159 } 5160 5161 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5162 const OMPExecutableDirective &D, 5163 llvm::Function *TaskFunction, 5164 QualType SharedsTy, Address Shareds, 5165 const Expr *IfCond, 5166 const OMPTaskDataTy &Data) { 5167 if (!CGF.HaveInsertPoint()) 5168 return; 5169 5170 TaskResultTy Result = 5171 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5172 llvm::Value *NewTask = Result.NewTask; 5173 llvm::Function *TaskEntry = Result.TaskEntry; 5174 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5175 LValue TDBase = Result.TDBase; 5176 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5177 ASTContext &C = CGM.getContext(); 5178 // Process list of dependences. 5179 Address DependenciesArray = Address::invalid(); 5180 unsigned NumDependencies = Data.Dependences.size(); 5181 if (NumDependencies) { 5182 // Dependence kind for RTL. 5183 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5184 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5185 RecordDecl *KmpDependInfoRD; 5186 QualType FlagsTy = 5187 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5188 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5189 if (KmpDependInfoTy.isNull()) { 5190 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5191 KmpDependInfoRD->startDefinition(); 5192 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5193 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5194 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5195 KmpDependInfoRD->completeDefinition(); 5196 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5197 } else { 5198 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5199 } 5200 // Define type kmp_depend_info[<Dependences.size()>]; 5201 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5202 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5203 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5204 // kmp_depend_info[<Dependences.size()>] deps; 5205 DependenciesArray = 5206 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5207 for (unsigned I = 0; I < NumDependencies; ++I) { 5208 const Expr *E = Data.Dependences[I].second; 5209 LValue Addr = CGF.EmitLValue(E); 5210 llvm::Value *Size; 5211 QualType Ty = E->getType(); 5212 if (const auto *ASE = 5213 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5214 LValue UpAddrLVal = 5215 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5216 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5217 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5218 llvm::Value *LowIntPtr = 5219 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5220 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5221 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5222 } else { 5223 Size = CGF.getTypeSize(Ty); 5224 } 5225 LValue Base = CGF.MakeAddrLValue( 5226 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5227 KmpDependInfoTy); 5228 // deps[i].base_addr = &<Dependences[i].second>; 5229 LValue BaseAddrLVal = CGF.EmitLValueForField( 5230 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5231 CGF.EmitStoreOfScalar( 5232 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5233 BaseAddrLVal); 5234 // deps[i].len = sizeof(<Dependences[i].second>); 5235 LValue LenLVal = CGF.EmitLValueForField( 5236 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5237 CGF.EmitStoreOfScalar(Size, LenLVal); 5238 // deps[i].flags = <Dependences[i].first>; 5239 RTLDependenceKindTy DepKind; 5240 switch (Data.Dependences[I].first) { 5241 case OMPC_DEPEND_in: 5242 DepKind = DepIn; 5243 break; 5244 // Out and InOut dependencies must use the same code. 5245 case OMPC_DEPEND_out: 5246 case OMPC_DEPEND_inout: 5247 DepKind = DepInOut; 5248 break; 5249 case OMPC_DEPEND_mutexinoutset: 5250 DepKind = DepMutexInOutSet; 5251 break; 5252 case OMPC_DEPEND_source: 5253 case OMPC_DEPEND_sink: 5254 case OMPC_DEPEND_unknown: 5255 llvm_unreachable("Unknown task dependence type"); 5256 } 5257 LValue FlagsLVal = CGF.EmitLValueForField( 5258 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5259 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5260 FlagsLVal); 5261 } 5262 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5263 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5264 } 5265 5266 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5267 // libcall. 5268 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5269 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5270 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5271 // list is not empty 5272 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5273 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5274 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5275 llvm::Value *DepTaskArgs[7]; 5276 if (NumDependencies) { 5277 DepTaskArgs[0] = UpLoc; 5278 DepTaskArgs[1] = ThreadID; 5279 DepTaskArgs[2] = NewTask; 5280 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5281 DepTaskArgs[4] = DependenciesArray.getPointer(); 5282 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5283 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5284 } 5285 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5286 &TaskArgs, 5287 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5288 if (!Data.Tied) { 5289 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5290 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5291 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5292 } 5293 if (NumDependencies) { 5294 CGF.EmitRuntimeCall( 5295 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5296 } else { 5297 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5298 TaskArgs); 5299 } 5300 // Check if parent region is untied and build return for untied task; 5301 if (auto *Region = 5302 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5303 Region->emitUntiedSwitch(CGF); 5304 }; 5305 5306 llvm::Value *DepWaitTaskArgs[6]; 5307 if (NumDependencies) { 5308 DepWaitTaskArgs[0] = UpLoc; 5309 DepWaitTaskArgs[1] = ThreadID; 5310 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5311 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5312 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5313 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5314 } 5315 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5316 NumDependencies, &DepWaitTaskArgs, 5317 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5318 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5319 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5320 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5321 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5322 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5323 // is specified. 5324 if (NumDependencies) 5325 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5326 DepWaitTaskArgs); 5327 // Call proxy_task_entry(gtid, new_task); 5328 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5329 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5330 Action.Enter(CGF); 5331 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5332 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5333 OutlinedFnArgs); 5334 }; 5335 5336 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5337 // kmp_task_t *new_task); 5338 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5339 // kmp_task_t *new_task); 5340 RegionCodeGenTy RCG(CodeGen); 5341 CommonActionTy Action( 5342 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5343 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5344 RCG.setAction(Action); 5345 RCG(CGF); 5346 }; 5347 5348 if (IfCond) { 5349 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5350 } else { 5351 RegionCodeGenTy ThenRCG(ThenCodeGen); 5352 ThenRCG(CGF); 5353 } 5354 } 5355 5356 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5357 const OMPLoopDirective &D, 5358 llvm::Function *TaskFunction, 5359 QualType SharedsTy, Address Shareds, 5360 const Expr *IfCond, 5361 const OMPTaskDataTy &Data) { 5362 if (!CGF.HaveInsertPoint()) 5363 return; 5364 TaskResultTy Result = 5365 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5366 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5367 // libcall. 5368 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5369 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5370 // sched, kmp_uint64 grainsize, void *task_dup); 5371 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5372 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5373 llvm::Value *IfVal; 5374 if (IfCond) { 5375 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5376 /*isSigned=*/true); 5377 } else { 5378 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5379 } 5380 5381 LValue LBLVal = CGF.EmitLValueForField( 5382 Result.TDBase, 5383 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5384 const auto *LBVar = 5385 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5386 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5387 LBLVal.getQuals(), 5388 /*IsInitializer=*/true); 5389 LValue UBLVal = CGF.EmitLValueForField( 5390 Result.TDBase, 5391 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5392 const auto *UBVar = 5393 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5394 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5395 UBLVal.getQuals(), 5396 /*IsInitializer=*/true); 5397 LValue StLVal = CGF.EmitLValueForField( 5398 Result.TDBase, 5399 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5400 const auto *StVar = 5401 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5402 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5403 StLVal.getQuals(), 5404 /*IsInitializer=*/true); 5405 // Store reductions address. 5406 LValue RedLVal = CGF.EmitLValueForField( 5407 Result.TDBase, 5408 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5409 if (Data.Reductions) { 5410 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5411 } else { 5412 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5413 CGF.getContext().VoidPtrTy); 5414 } 5415 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5416 llvm::Value *TaskArgs[] = { 5417 UpLoc, 5418 ThreadID, 5419 Result.NewTask, 5420 IfVal, 5421 LBLVal.getPointer(CGF), 5422 UBLVal.getPointer(CGF), 5423 CGF.EmitLoadOfScalar(StLVal, Loc), 5424 llvm::ConstantInt::getSigned( 5425 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5426 llvm::ConstantInt::getSigned( 5427 CGF.IntTy, Data.Schedule.getPointer() 5428 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5429 : NoSchedule), 5430 Data.Schedule.getPointer() 5431 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5432 /*isSigned=*/false) 5433 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5434 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5435 Result.TaskDupFn, CGF.VoidPtrTy) 5436 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5437 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5438 } 5439 5440 /// Emit reduction operation for each element of array (required for 5441 /// array sections) LHS op = RHS. 5442 /// \param Type Type of array. 5443 /// \param LHSVar Variable on the left side of the reduction operation 5444 /// (references element of array in original variable). 5445 /// \param RHSVar Variable on the right side of the reduction operation 5446 /// (references element of array in original variable). 5447 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5448 /// RHSVar. 5449 static void EmitOMPAggregateReduction( 5450 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5451 const VarDecl *RHSVar, 5452 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5453 const Expr *, const Expr *)> &RedOpGen, 5454 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5455 const Expr *UpExpr = nullptr) { 5456 // Perform element-by-element initialization. 5457 QualType ElementTy; 5458 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5459 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5460 5461 // Drill down to the base element type on both arrays. 5462 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5463 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5464 5465 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5466 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5467 // Cast from pointer to array type to pointer to single element. 5468 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5469 // The basic structure here is a while-do loop. 5470 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5471 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5472 llvm::Value *IsEmpty = 5473 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5474 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5475 5476 // Enter the loop body, making that address the current address. 5477 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5478 CGF.EmitBlock(BodyBB); 5479 5480 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5481 5482 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5483 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5484 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5485 Address RHSElementCurrent = 5486 Address(RHSElementPHI, 5487 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5488 5489 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5490 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5491 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5492 Address LHSElementCurrent = 5493 Address(LHSElementPHI, 5494 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5495 5496 // Emit copy. 5497 CodeGenFunction::OMPPrivateScope Scope(CGF); 5498 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5499 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5500 Scope.Privatize(); 5501 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5502 Scope.ForceCleanup(); 5503 5504 // Shift the address forward by one element. 5505 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5506 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5507 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5508 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5509 // Check whether we've reached the end. 5510 llvm::Value *Done = 5511 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5512 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5513 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5514 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5515 5516 // Done. 5517 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5518 } 5519 5520 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5521 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5522 /// UDR combiner function. 5523 static void emitReductionCombiner(CodeGenFunction &CGF, 5524 const Expr *ReductionOp) { 5525 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5526 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5527 if (const auto *DRE = 5528 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5529 if (const auto *DRD = 5530 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5531 std::pair<llvm::Function *, llvm::Function *> Reduction = 5532 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5533 RValue Func = RValue::get(Reduction.first); 5534 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5535 CGF.EmitIgnoredExpr(ReductionOp); 5536 return; 5537 } 5538 CGF.EmitIgnoredExpr(ReductionOp); 5539 } 5540 5541 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5542 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5543 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5544 ArrayRef<const Expr *> ReductionOps) { 5545 ASTContext &C = CGM.getContext(); 5546 5547 // void reduction_func(void *LHSArg, void *RHSArg); 5548 FunctionArgList Args; 5549 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5550 ImplicitParamDecl::Other); 5551 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5552 ImplicitParamDecl::Other); 5553 Args.push_back(&LHSArg); 5554 Args.push_back(&RHSArg); 5555 const auto &CGFI = 5556 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5557 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5558 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5559 llvm::GlobalValue::InternalLinkage, Name, 5560 &CGM.getModule()); 5561 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5562 Fn->setDoesNotRecurse(); 5563 CodeGenFunction CGF(CGM); 5564 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5565 5566 // Dst = (void*[n])(LHSArg); 5567 // Src = (void*[n])(RHSArg); 5568 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5569 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5570 ArgsType), CGF.getPointerAlign()); 5571 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5572 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5573 ArgsType), CGF.getPointerAlign()); 5574 5575 // ... 5576 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5577 // ... 5578 CodeGenFunction::OMPPrivateScope Scope(CGF); 5579 auto IPriv = Privates.begin(); 5580 unsigned Idx = 0; 5581 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5582 const auto *RHSVar = 5583 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5584 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5585 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5586 }); 5587 const auto *LHSVar = 5588 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5589 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5590 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5591 }); 5592 QualType PrivTy = (*IPriv)->getType(); 5593 if (PrivTy->isVariablyModifiedType()) { 5594 // Get array size and emit VLA type. 5595 ++Idx; 5596 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5597 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5598 const VariableArrayType *VLA = 5599 CGF.getContext().getAsVariableArrayType(PrivTy); 5600 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5601 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5602 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5603 CGF.EmitVariablyModifiedType(PrivTy); 5604 } 5605 } 5606 Scope.Privatize(); 5607 IPriv = Privates.begin(); 5608 auto ILHS = LHSExprs.begin(); 5609 auto IRHS = RHSExprs.begin(); 5610 for (const Expr *E : ReductionOps) { 5611 if ((*IPriv)->getType()->isArrayType()) { 5612 // Emit reduction for array section. 5613 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5614 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5615 EmitOMPAggregateReduction( 5616 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5617 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5618 emitReductionCombiner(CGF, E); 5619 }); 5620 } else { 5621 // Emit reduction for array subscript or single variable. 5622 emitReductionCombiner(CGF, E); 5623 } 5624 ++IPriv; 5625 ++ILHS; 5626 ++IRHS; 5627 } 5628 Scope.ForceCleanup(); 5629 CGF.FinishFunction(); 5630 return Fn; 5631 } 5632 5633 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5634 const Expr *ReductionOp, 5635 const Expr *PrivateRef, 5636 const DeclRefExpr *LHS, 5637 const DeclRefExpr *RHS) { 5638 if (PrivateRef->getType()->isArrayType()) { 5639 // Emit reduction for array section. 5640 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5641 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5642 EmitOMPAggregateReduction( 5643 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5644 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5645 emitReductionCombiner(CGF, ReductionOp); 5646 }); 5647 } else { 5648 // Emit reduction for array subscript or single variable. 5649 emitReductionCombiner(CGF, ReductionOp); 5650 } 5651 } 5652 5653 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5654 ArrayRef<const Expr *> Privates, 5655 ArrayRef<const Expr *> LHSExprs, 5656 ArrayRef<const Expr *> RHSExprs, 5657 ArrayRef<const Expr *> ReductionOps, 5658 ReductionOptionsTy Options) { 5659 if (!CGF.HaveInsertPoint()) 5660 return; 5661 5662 bool WithNowait = Options.WithNowait; 5663 bool SimpleReduction = Options.SimpleReduction; 5664 5665 // Next code should be emitted for reduction: 5666 // 5667 // static kmp_critical_name lock = { 0 }; 5668 // 5669 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5670 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5671 // ... 5672 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5673 // *(Type<n>-1*)rhs[<n>-1]); 5674 // } 5675 // 5676 // ... 5677 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5678 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5679 // RedList, reduce_func, &<lock>)) { 5680 // case 1: 5681 // ... 5682 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5683 // ... 5684 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5685 // break; 5686 // case 2: 5687 // ... 5688 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5689 // ... 5690 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5691 // break; 5692 // default:; 5693 // } 5694 // 5695 // if SimpleReduction is true, only the next code is generated: 5696 // ... 5697 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5698 // ... 5699 5700 ASTContext &C = CGM.getContext(); 5701 5702 if (SimpleReduction) { 5703 CodeGenFunction::RunCleanupsScope Scope(CGF); 5704 auto IPriv = Privates.begin(); 5705 auto ILHS = LHSExprs.begin(); 5706 auto IRHS = RHSExprs.begin(); 5707 for (const Expr *E : ReductionOps) { 5708 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5709 cast<DeclRefExpr>(*IRHS)); 5710 ++IPriv; 5711 ++ILHS; 5712 ++IRHS; 5713 } 5714 return; 5715 } 5716 5717 // 1. Build a list of reduction variables. 5718 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5719 auto Size = RHSExprs.size(); 5720 for (const Expr *E : Privates) { 5721 if (E->getType()->isVariablyModifiedType()) 5722 // Reserve place for array size. 5723 ++Size; 5724 } 5725 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5726 QualType ReductionArrayTy = 5727 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5728 /*IndexTypeQuals=*/0); 5729 Address ReductionList = 5730 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5731 auto IPriv = Privates.begin(); 5732 unsigned Idx = 0; 5733 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5734 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5735 CGF.Builder.CreateStore( 5736 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5737 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5738 Elem); 5739 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5740 // Store array size. 5741 ++Idx; 5742 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5743 llvm::Value *Size = CGF.Builder.CreateIntCast( 5744 CGF.getVLASize( 5745 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5746 .NumElts, 5747 CGF.SizeTy, /*isSigned=*/false); 5748 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5749 Elem); 5750 } 5751 } 5752 5753 // 2. Emit reduce_func(). 5754 llvm::Function *ReductionFn = emitReductionFunction( 5755 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5756 LHSExprs, RHSExprs, ReductionOps); 5757 5758 // 3. Create static kmp_critical_name lock = { 0 }; 5759 std::string Name = getName({"reduction"}); 5760 llvm::Value *Lock = getCriticalRegionLock(Name); 5761 5762 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5763 // RedList, reduce_func, &<lock>); 5764 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5765 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5766 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5767 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5768 ReductionList.getPointer(), CGF.VoidPtrTy); 5769 llvm::Value *Args[] = { 5770 IdentTLoc, // ident_t *<loc> 5771 ThreadId, // i32 <gtid> 5772 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5773 ReductionArrayTySize, // size_type sizeof(RedList) 5774 RL, // void *RedList 5775 ReductionFn, // void (*) (void *, void *) <reduce_func> 5776 Lock // kmp_critical_name *&<lock> 5777 }; 5778 llvm::Value *Res = CGF.EmitRuntimeCall( 5779 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5780 : OMPRTL__kmpc_reduce), 5781 Args); 5782 5783 // 5. Build switch(res) 5784 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5785 llvm::SwitchInst *SwInst = 5786 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5787 5788 // 6. Build case 1: 5789 // ... 5790 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5791 // ... 5792 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5793 // break; 5794 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5795 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5796 CGF.EmitBlock(Case1BB); 5797 5798 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5799 llvm::Value *EndArgs[] = { 5800 IdentTLoc, // ident_t *<loc> 5801 ThreadId, // i32 <gtid> 5802 Lock // kmp_critical_name *&<lock> 5803 }; 5804 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5805 CodeGenFunction &CGF, PrePostActionTy &Action) { 5806 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5807 auto IPriv = Privates.begin(); 5808 auto ILHS = LHSExprs.begin(); 5809 auto IRHS = RHSExprs.begin(); 5810 for (const Expr *E : ReductionOps) { 5811 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5812 cast<DeclRefExpr>(*IRHS)); 5813 ++IPriv; 5814 ++ILHS; 5815 ++IRHS; 5816 } 5817 }; 5818 RegionCodeGenTy RCG(CodeGen); 5819 CommonActionTy Action( 5820 nullptr, llvm::None, 5821 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5822 : OMPRTL__kmpc_end_reduce), 5823 EndArgs); 5824 RCG.setAction(Action); 5825 RCG(CGF); 5826 5827 CGF.EmitBranch(DefaultBB); 5828 5829 // 7. Build case 2: 5830 // ... 5831 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5832 // ... 5833 // break; 5834 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5835 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5836 CGF.EmitBlock(Case2BB); 5837 5838 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5839 CodeGenFunction &CGF, PrePostActionTy &Action) { 5840 auto ILHS = LHSExprs.begin(); 5841 auto IRHS = RHSExprs.begin(); 5842 auto IPriv = Privates.begin(); 5843 for (const Expr *E : ReductionOps) { 5844 const Expr *XExpr = nullptr; 5845 const Expr *EExpr = nullptr; 5846 const Expr *UpExpr = nullptr; 5847 BinaryOperatorKind BO = BO_Comma; 5848 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5849 if (BO->getOpcode() == BO_Assign) { 5850 XExpr = BO->getLHS(); 5851 UpExpr = BO->getRHS(); 5852 } 5853 } 5854 // Try to emit update expression as a simple atomic. 5855 const Expr *RHSExpr = UpExpr; 5856 if (RHSExpr) { 5857 // Analyze RHS part of the whole expression. 5858 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5859 RHSExpr->IgnoreParenImpCasts())) { 5860 // If this is a conditional operator, analyze its condition for 5861 // min/max reduction operator. 5862 RHSExpr = ACO->getCond(); 5863 } 5864 if (const auto *BORHS = 5865 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5866 EExpr = BORHS->getRHS(); 5867 BO = BORHS->getOpcode(); 5868 } 5869 } 5870 if (XExpr) { 5871 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5872 auto &&AtomicRedGen = [BO, VD, 5873 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5874 const Expr *EExpr, const Expr *UpExpr) { 5875 LValue X = CGF.EmitLValue(XExpr); 5876 RValue E; 5877 if (EExpr) 5878 E = CGF.EmitAnyExpr(EExpr); 5879 CGF.EmitOMPAtomicSimpleUpdateExpr( 5880 X, E, BO, /*IsXLHSInRHSPart=*/true, 5881 llvm::AtomicOrdering::Monotonic, Loc, 5882 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5883 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5884 PrivateScope.addPrivate( 5885 VD, [&CGF, VD, XRValue, Loc]() { 5886 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5887 CGF.emitOMPSimpleStore( 5888 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5889 VD->getType().getNonReferenceType(), Loc); 5890 return LHSTemp; 5891 }); 5892 (void)PrivateScope.Privatize(); 5893 return CGF.EmitAnyExpr(UpExpr); 5894 }); 5895 }; 5896 if ((*IPriv)->getType()->isArrayType()) { 5897 // Emit atomic reduction for array section. 5898 const auto *RHSVar = 5899 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5900 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5901 AtomicRedGen, XExpr, EExpr, UpExpr); 5902 } else { 5903 // Emit atomic reduction for array subscript or single variable. 5904 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5905 } 5906 } else { 5907 // Emit as a critical region. 5908 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5909 const Expr *, const Expr *) { 5910 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5911 std::string Name = RT.getName({"atomic_reduction"}); 5912 RT.emitCriticalRegion( 5913 CGF, Name, 5914 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5915 Action.Enter(CGF); 5916 emitReductionCombiner(CGF, E); 5917 }, 5918 Loc); 5919 }; 5920 if ((*IPriv)->getType()->isArrayType()) { 5921 const auto *LHSVar = 5922 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5923 const auto *RHSVar = 5924 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5925 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5926 CritRedGen); 5927 } else { 5928 CritRedGen(CGF, nullptr, nullptr, nullptr); 5929 } 5930 } 5931 ++ILHS; 5932 ++IRHS; 5933 ++IPriv; 5934 } 5935 }; 5936 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5937 if (!WithNowait) { 5938 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5939 llvm::Value *EndArgs[] = { 5940 IdentTLoc, // ident_t *<loc> 5941 ThreadId, // i32 <gtid> 5942 Lock // kmp_critical_name *&<lock> 5943 }; 5944 CommonActionTy Action(nullptr, llvm::None, 5945 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5946 EndArgs); 5947 AtomicRCG.setAction(Action); 5948 AtomicRCG(CGF); 5949 } else { 5950 AtomicRCG(CGF); 5951 } 5952 5953 CGF.EmitBranch(DefaultBB); 5954 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5955 } 5956 5957 /// Generates unique name for artificial threadprivate variables. 5958 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5959 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5960 const Expr *Ref) { 5961 SmallString<256> Buffer; 5962 llvm::raw_svector_ostream Out(Buffer); 5963 const clang::DeclRefExpr *DE; 5964 const VarDecl *D = ::getBaseDecl(Ref, DE); 5965 if (!D) 5966 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5967 D = D->getCanonicalDecl(); 5968 std::string Name = CGM.getOpenMPRuntime().getName( 5969 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5970 Out << Prefix << Name << "_" 5971 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5972 return Out.str(); 5973 } 5974 5975 /// Emits reduction initializer function: 5976 /// \code 5977 /// void @.red_init(void* %arg) { 5978 /// %0 = bitcast void* %arg to <type>* 5979 /// store <type> <init>, <type>* %0 5980 /// ret void 5981 /// } 5982 /// \endcode 5983 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5984 SourceLocation Loc, 5985 ReductionCodeGen &RCG, unsigned N) { 5986 ASTContext &C = CGM.getContext(); 5987 FunctionArgList Args; 5988 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5989 ImplicitParamDecl::Other); 5990 Args.emplace_back(&Param); 5991 const auto &FnInfo = 5992 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5993 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5994 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5995 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5996 Name, &CGM.getModule()); 5997 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5998 Fn->setDoesNotRecurse(); 5999 CodeGenFunction CGF(CGM); 6000 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6001 Address PrivateAddr = CGF.EmitLoadOfPointer( 6002 CGF.GetAddrOfLocalVar(&Param), 6003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6004 llvm::Value *Size = nullptr; 6005 // If the size of the reduction item is non-constant, load it from global 6006 // threadprivate variable. 6007 if (RCG.getSizes(N).second) { 6008 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6009 CGF, CGM.getContext().getSizeType(), 6010 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6011 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6012 CGM.getContext().getSizeType(), Loc); 6013 } 6014 RCG.emitAggregateType(CGF, N, Size); 6015 LValue SharedLVal; 6016 // If initializer uses initializer from declare reduction construct, emit a 6017 // pointer to the address of the original reduction item (reuired by reduction 6018 // initializer) 6019 if (RCG.usesReductionInitializer(N)) { 6020 Address SharedAddr = 6021 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6022 CGF, CGM.getContext().VoidPtrTy, 6023 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6024 SharedAddr = CGF.EmitLoadOfPointer( 6025 SharedAddr, 6026 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6027 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6028 } else { 6029 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6030 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6031 CGM.getContext().VoidPtrTy); 6032 } 6033 // Emit the initializer: 6034 // %0 = bitcast void* %arg to <type>* 6035 // store <type> <init>, <type>* %0 6036 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6037 [](CodeGenFunction &) { return false; }); 6038 CGF.FinishFunction(); 6039 return Fn; 6040 } 6041 6042 /// Emits reduction combiner function: 6043 /// \code 6044 /// void @.red_comb(void* %arg0, void* %arg1) { 6045 /// %lhs = bitcast void* %arg0 to <type>* 6046 /// %rhs = bitcast void* %arg1 to <type>* 6047 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6048 /// store <type> %2, <type>* %lhs 6049 /// ret void 6050 /// } 6051 /// \endcode 6052 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6053 SourceLocation Loc, 6054 ReductionCodeGen &RCG, unsigned N, 6055 const Expr *ReductionOp, 6056 const Expr *LHS, const Expr *RHS, 6057 const Expr *PrivateRef) { 6058 ASTContext &C = CGM.getContext(); 6059 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6060 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6061 FunctionArgList Args; 6062 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6063 C.VoidPtrTy, ImplicitParamDecl::Other); 6064 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6065 ImplicitParamDecl::Other); 6066 Args.emplace_back(&ParamInOut); 6067 Args.emplace_back(&ParamIn); 6068 const auto &FnInfo = 6069 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6070 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6071 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6072 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6073 Name, &CGM.getModule()); 6074 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6075 Fn->setDoesNotRecurse(); 6076 CodeGenFunction CGF(CGM); 6077 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6078 llvm::Value *Size = nullptr; 6079 // If the size of the reduction item is non-constant, load it from global 6080 // threadprivate variable. 6081 if (RCG.getSizes(N).second) { 6082 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6083 CGF, CGM.getContext().getSizeType(), 6084 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6085 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6086 CGM.getContext().getSizeType(), Loc); 6087 } 6088 RCG.emitAggregateType(CGF, N, Size); 6089 // Remap lhs and rhs variables to the addresses of the function arguments. 6090 // %lhs = bitcast void* %arg0 to <type>* 6091 // %rhs = bitcast void* %arg1 to <type>* 6092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6093 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6094 // Pull out the pointer to the variable. 6095 Address PtrAddr = CGF.EmitLoadOfPointer( 6096 CGF.GetAddrOfLocalVar(&ParamInOut), 6097 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6098 return CGF.Builder.CreateElementBitCast( 6099 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6100 }); 6101 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6102 // Pull out the pointer to the variable. 6103 Address PtrAddr = CGF.EmitLoadOfPointer( 6104 CGF.GetAddrOfLocalVar(&ParamIn), 6105 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6106 return CGF.Builder.CreateElementBitCast( 6107 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6108 }); 6109 PrivateScope.Privatize(); 6110 // Emit the combiner body: 6111 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6112 // store <type> %2, <type>* %lhs 6113 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6114 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6115 cast<DeclRefExpr>(RHS)); 6116 CGF.FinishFunction(); 6117 return Fn; 6118 } 6119 6120 /// Emits reduction finalizer function: 6121 /// \code 6122 /// void @.red_fini(void* %arg) { 6123 /// %0 = bitcast void* %arg to <type>* 6124 /// <destroy>(<type>* %0) 6125 /// ret void 6126 /// } 6127 /// \endcode 6128 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6129 SourceLocation Loc, 6130 ReductionCodeGen &RCG, unsigned N) { 6131 if (!RCG.needCleanups(N)) 6132 return nullptr; 6133 ASTContext &C = CGM.getContext(); 6134 FunctionArgList Args; 6135 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6136 ImplicitParamDecl::Other); 6137 Args.emplace_back(&Param); 6138 const auto &FnInfo = 6139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6140 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6141 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6142 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6143 Name, &CGM.getModule()); 6144 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6145 Fn->setDoesNotRecurse(); 6146 CodeGenFunction CGF(CGM); 6147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6148 Address PrivateAddr = CGF.EmitLoadOfPointer( 6149 CGF.GetAddrOfLocalVar(&Param), 6150 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6151 llvm::Value *Size = nullptr; 6152 // If the size of the reduction item is non-constant, load it from global 6153 // threadprivate variable. 6154 if (RCG.getSizes(N).second) { 6155 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6156 CGF, CGM.getContext().getSizeType(), 6157 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6158 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6159 CGM.getContext().getSizeType(), Loc); 6160 } 6161 RCG.emitAggregateType(CGF, N, Size); 6162 // Emit the finalizer body: 6163 // <destroy>(<type>* %0) 6164 RCG.emitCleanups(CGF, N, PrivateAddr); 6165 CGF.FinishFunction(Loc); 6166 return Fn; 6167 } 6168 6169 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6170 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6171 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6172 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6173 return nullptr; 6174 6175 // Build typedef struct: 6176 // kmp_task_red_input { 6177 // void *reduce_shar; // shared reduction item 6178 // size_t reduce_size; // size of data item 6179 // void *reduce_init; // data initialization routine 6180 // void *reduce_fini; // data finalization routine 6181 // void *reduce_comb; // data combiner routine 6182 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6183 // } kmp_task_red_input_t; 6184 ASTContext &C = CGM.getContext(); 6185 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6186 RD->startDefinition(); 6187 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6188 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6189 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6190 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6191 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6192 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6193 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6194 RD->completeDefinition(); 6195 QualType RDType = C.getRecordType(RD); 6196 unsigned Size = Data.ReductionVars.size(); 6197 llvm::APInt ArraySize(/*numBits=*/64, Size); 6198 QualType ArrayRDType = C.getConstantArrayType( 6199 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6200 // kmp_task_red_input_t .rd_input.[Size]; 6201 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6202 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6203 Data.ReductionOps); 6204 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6205 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6206 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6207 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6208 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6209 TaskRedInput.getPointer(), Idxs, 6210 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6211 ".rd_input.gep."); 6212 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6213 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6214 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6215 RCG.emitSharedLValue(CGF, Cnt); 6216 llvm::Value *CastedShared = 6217 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6218 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6219 RCG.emitAggregateType(CGF, Cnt); 6220 llvm::Value *SizeValInChars; 6221 llvm::Value *SizeVal; 6222 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6223 // We use delayed creation/initialization for VLAs, array sections and 6224 // custom reduction initializations. It is required because runtime does not 6225 // provide the way to pass the sizes of VLAs/array sections to 6226 // initializer/combiner/finalizer functions and does not pass the pointer to 6227 // original reduction item to the initializer. Instead threadprivate global 6228 // variables are used to store these values and use them in the functions. 6229 bool DelayedCreation = !!SizeVal; 6230 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6231 /*isSigned=*/false); 6232 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6233 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6234 // ElemLVal.reduce_init = init; 6235 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6236 llvm::Value *InitAddr = 6237 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6238 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6239 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6240 // ElemLVal.reduce_fini = fini; 6241 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6242 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6243 llvm::Value *FiniAddr = Fini 6244 ? CGF.EmitCastToVoidPtr(Fini) 6245 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6246 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6247 // ElemLVal.reduce_comb = comb; 6248 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6249 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6250 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6251 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6252 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6253 // ElemLVal.flags = 0; 6254 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6255 if (DelayedCreation) { 6256 CGF.EmitStoreOfScalar( 6257 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6258 FlagsLVal); 6259 } else 6260 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6261 FlagsLVal.getType()); 6262 } 6263 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6264 // *data); 6265 llvm::Value *Args[] = { 6266 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6267 /*isSigned=*/true), 6268 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6270 CGM.VoidPtrTy)}; 6271 return CGF.EmitRuntimeCall( 6272 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6273 } 6274 6275 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6276 SourceLocation Loc, 6277 ReductionCodeGen &RCG, 6278 unsigned N) { 6279 auto Sizes = RCG.getSizes(N); 6280 // Emit threadprivate global variable if the type is non-constant 6281 // (Sizes.second = nullptr). 6282 if (Sizes.second) { 6283 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6284 /*isSigned=*/false); 6285 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6286 CGF, CGM.getContext().getSizeType(), 6287 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6288 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6289 } 6290 // Store address of the original reduction item if custom initializer is used. 6291 if (RCG.usesReductionInitializer(N)) { 6292 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6293 CGF, CGM.getContext().VoidPtrTy, 6294 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6295 CGF.Builder.CreateStore( 6296 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6297 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6298 SharedAddr, /*IsVolatile=*/false); 6299 } 6300 } 6301 6302 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6303 SourceLocation Loc, 6304 llvm::Value *ReductionsPtr, 6305 LValue SharedLVal) { 6306 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6307 // *d); 6308 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6309 CGM.IntTy, 6310 /*isSigned=*/true), 6311 ReductionsPtr, 6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6313 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6314 return Address( 6315 CGF.EmitRuntimeCall( 6316 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6317 SharedLVal.getAlignment()); 6318 } 6319 6320 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6321 SourceLocation Loc) { 6322 if (!CGF.HaveInsertPoint()) 6323 return; 6324 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6325 // global_tid); 6326 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6327 // Ignore return result until untied tasks are supported. 6328 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6329 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6330 Region->emitUntiedSwitch(CGF); 6331 } 6332 6333 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6334 OpenMPDirectiveKind InnerKind, 6335 const RegionCodeGenTy &CodeGen, 6336 bool HasCancel) { 6337 if (!CGF.HaveInsertPoint()) 6338 return; 6339 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6340 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6341 } 6342 6343 namespace { 6344 enum RTCancelKind { 6345 CancelNoreq = 0, 6346 CancelParallel = 1, 6347 CancelLoop = 2, 6348 CancelSections = 3, 6349 CancelTaskgroup = 4 6350 }; 6351 } // anonymous namespace 6352 6353 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6354 RTCancelKind CancelKind = CancelNoreq; 6355 if (CancelRegion == OMPD_parallel) 6356 CancelKind = CancelParallel; 6357 else if (CancelRegion == OMPD_for) 6358 CancelKind = CancelLoop; 6359 else if (CancelRegion == OMPD_sections) 6360 CancelKind = CancelSections; 6361 else { 6362 assert(CancelRegion == OMPD_taskgroup); 6363 CancelKind = CancelTaskgroup; 6364 } 6365 return CancelKind; 6366 } 6367 6368 void CGOpenMPRuntime::emitCancellationPointCall( 6369 CodeGenFunction &CGF, SourceLocation Loc, 6370 OpenMPDirectiveKind CancelRegion) { 6371 if (!CGF.HaveInsertPoint()) 6372 return; 6373 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6374 // global_tid, kmp_int32 cncl_kind); 6375 if (auto *OMPRegionInfo = 6376 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6377 // For 'cancellation point taskgroup', the task region info may not have a 6378 // cancel. This may instead happen in another adjacent task. 6379 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6380 llvm::Value *Args[] = { 6381 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6382 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6383 // Ignore return result until untied tasks are supported. 6384 llvm::Value *Result = CGF.EmitRuntimeCall( 6385 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6386 // if (__kmpc_cancellationpoint()) { 6387 // exit from construct; 6388 // } 6389 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6390 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6391 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6392 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6393 CGF.EmitBlock(ExitBB); 6394 // exit from construct; 6395 CodeGenFunction::JumpDest CancelDest = 6396 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6397 CGF.EmitBranchThroughCleanup(CancelDest); 6398 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6399 } 6400 } 6401 } 6402 6403 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6404 const Expr *IfCond, 6405 OpenMPDirectiveKind CancelRegion) { 6406 if (!CGF.HaveInsertPoint()) 6407 return; 6408 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6409 // kmp_int32 cncl_kind); 6410 if (auto *OMPRegionInfo = 6411 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6412 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6413 PrePostActionTy &) { 6414 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6415 llvm::Value *Args[] = { 6416 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6417 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6418 // Ignore return result until untied tasks are supported. 6419 llvm::Value *Result = CGF.EmitRuntimeCall( 6420 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6421 // if (__kmpc_cancel()) { 6422 // exit from construct; 6423 // } 6424 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6425 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6426 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6427 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6428 CGF.EmitBlock(ExitBB); 6429 // exit from construct; 6430 CodeGenFunction::JumpDest CancelDest = 6431 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6432 CGF.EmitBranchThroughCleanup(CancelDest); 6433 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6434 }; 6435 if (IfCond) { 6436 emitIfClause(CGF, IfCond, ThenGen, 6437 [](CodeGenFunction &, PrePostActionTy &) {}); 6438 } else { 6439 RegionCodeGenTy ThenRCG(ThenGen); 6440 ThenRCG(CGF); 6441 } 6442 } 6443 } 6444 6445 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6446 const OMPExecutableDirective &D, StringRef ParentName, 6447 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6448 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6449 assert(!ParentName.empty() && "Invalid target region parent name!"); 6450 HasEmittedTargetRegion = true; 6451 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6452 IsOffloadEntry, CodeGen); 6453 } 6454 6455 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6456 const OMPExecutableDirective &D, StringRef ParentName, 6457 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6458 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6459 // Create a unique name for the entry function using the source location 6460 // information of the current target region. The name will be something like: 6461 // 6462 // __omp_offloading_DD_FFFF_PP_lBB 6463 // 6464 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6465 // mangled name of the function that encloses the target region and BB is the 6466 // line number of the target region. 6467 6468 unsigned DeviceID; 6469 unsigned FileID; 6470 unsigned Line; 6471 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6472 Line); 6473 SmallString<64> EntryFnName; 6474 { 6475 llvm::raw_svector_ostream OS(EntryFnName); 6476 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6477 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6478 } 6479 6480 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6481 6482 CodeGenFunction CGF(CGM, true); 6483 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6484 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6485 6486 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6487 6488 // If this target outline function is not an offload entry, we don't need to 6489 // register it. 6490 if (!IsOffloadEntry) 6491 return; 6492 6493 // The target region ID is used by the runtime library to identify the current 6494 // target region, so it only has to be unique and not necessarily point to 6495 // anything. It could be the pointer to the outlined function that implements 6496 // the target region, but we aren't using that so that the compiler doesn't 6497 // need to keep that, and could therefore inline the host function if proven 6498 // worthwhile during optimization. In the other hand, if emitting code for the 6499 // device, the ID has to be the function address so that it can retrieved from 6500 // the offloading entry and launched by the runtime library. We also mark the 6501 // outlined function to have external linkage in case we are emitting code for 6502 // the device, because these functions will be entry points to the device. 6503 6504 if (CGM.getLangOpts().OpenMPIsDevice) { 6505 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6506 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6507 OutlinedFn->setDSOLocal(false); 6508 } else { 6509 std::string Name = getName({EntryFnName, "region_id"}); 6510 OutlinedFnID = new llvm::GlobalVariable( 6511 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6512 llvm::GlobalValue::WeakAnyLinkage, 6513 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6514 } 6515 6516 // Register the information for the entry associated with this target region. 6517 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6518 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6519 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6520 } 6521 6522 /// Checks if the expression is constant or does not have non-trivial function 6523 /// calls. 6524 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6525 // We can skip constant expressions. 6526 // We can skip expressions with trivial calls or simple expressions. 6527 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6528 !E->hasNonTrivialCall(Ctx)) && 6529 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6530 } 6531 6532 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6533 const Stmt *Body) { 6534 const Stmt *Child = Body->IgnoreContainers(); 6535 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6536 Child = nullptr; 6537 for (const Stmt *S : C->body()) { 6538 if (const auto *E = dyn_cast<Expr>(S)) { 6539 if (isTrivial(Ctx, E)) 6540 continue; 6541 } 6542 // Some of the statements can be ignored. 6543 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6544 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6545 continue; 6546 // Analyze declarations. 6547 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6548 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6549 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6550 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6551 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6552 isa<UsingDirectiveDecl>(D) || 6553 isa<OMPDeclareReductionDecl>(D) || 6554 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6555 return true; 6556 const auto *VD = dyn_cast<VarDecl>(D); 6557 if (!VD) 6558 return false; 6559 return VD->isConstexpr() || 6560 ((VD->getType().isTrivialType(Ctx) || 6561 VD->getType()->isReferenceType()) && 6562 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6563 })) 6564 continue; 6565 } 6566 // Found multiple children - cannot get the one child only. 6567 if (Child) 6568 return nullptr; 6569 Child = S; 6570 } 6571 if (Child) 6572 Child = Child->IgnoreContainers(); 6573 } 6574 return Child; 6575 } 6576 6577 /// Emit the number of teams for a target directive. Inspect the num_teams 6578 /// clause associated with a teams construct combined or closely nested 6579 /// with the target directive. 6580 /// 6581 /// Emit a team of size one for directives such as 'target parallel' that 6582 /// have no associated teams construct. 6583 /// 6584 /// Otherwise, return nullptr. 6585 static llvm::Value * 6586 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6587 const OMPExecutableDirective &D) { 6588 assert(!CGF.getLangOpts().OpenMPIsDevice && 6589 "Clauses associated with the teams directive expected to be emitted " 6590 "only for the host!"); 6591 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6592 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6593 "Expected target-based executable directive."); 6594 CGBuilderTy &Bld = CGF.Builder; 6595 switch (DirectiveKind) { 6596 case OMPD_target: { 6597 const auto *CS = D.getInnermostCapturedStmt(); 6598 const auto *Body = 6599 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6600 const Stmt *ChildStmt = 6601 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6602 if (const auto *NestedDir = 6603 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6604 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6605 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6606 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6607 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6608 const Expr *NumTeams = 6609 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6610 llvm::Value *NumTeamsVal = 6611 CGF.EmitScalarExpr(NumTeams, 6612 /*IgnoreResultAssign*/ true); 6613 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6614 /*isSigned=*/true); 6615 } 6616 return Bld.getInt32(0); 6617 } 6618 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6619 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6620 return Bld.getInt32(1); 6621 return Bld.getInt32(0); 6622 } 6623 return nullptr; 6624 } 6625 case OMPD_target_teams: 6626 case OMPD_target_teams_distribute: 6627 case OMPD_target_teams_distribute_simd: 6628 case OMPD_target_teams_distribute_parallel_for: 6629 case OMPD_target_teams_distribute_parallel_for_simd: { 6630 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6631 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6632 const Expr *NumTeams = 6633 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6634 llvm::Value *NumTeamsVal = 6635 CGF.EmitScalarExpr(NumTeams, 6636 /*IgnoreResultAssign*/ true); 6637 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6638 /*isSigned=*/true); 6639 } 6640 return Bld.getInt32(0); 6641 } 6642 case OMPD_target_parallel: 6643 case OMPD_target_parallel_for: 6644 case OMPD_target_parallel_for_simd: 6645 case OMPD_target_simd: 6646 return Bld.getInt32(1); 6647 case OMPD_parallel: 6648 case OMPD_for: 6649 case OMPD_parallel_for: 6650 case OMPD_parallel_master: 6651 case OMPD_parallel_sections: 6652 case OMPD_for_simd: 6653 case OMPD_parallel_for_simd: 6654 case OMPD_cancel: 6655 case OMPD_cancellation_point: 6656 case OMPD_ordered: 6657 case OMPD_threadprivate: 6658 case OMPD_allocate: 6659 case OMPD_task: 6660 case OMPD_simd: 6661 case OMPD_sections: 6662 case OMPD_section: 6663 case OMPD_single: 6664 case OMPD_master: 6665 case OMPD_critical: 6666 case OMPD_taskyield: 6667 case OMPD_barrier: 6668 case OMPD_taskwait: 6669 case OMPD_taskgroup: 6670 case OMPD_atomic: 6671 case OMPD_flush: 6672 case OMPD_teams: 6673 case OMPD_target_data: 6674 case OMPD_target_exit_data: 6675 case OMPD_target_enter_data: 6676 case OMPD_distribute: 6677 case OMPD_distribute_simd: 6678 case OMPD_distribute_parallel_for: 6679 case OMPD_distribute_parallel_for_simd: 6680 case OMPD_teams_distribute: 6681 case OMPD_teams_distribute_simd: 6682 case OMPD_teams_distribute_parallel_for: 6683 case OMPD_teams_distribute_parallel_for_simd: 6684 case OMPD_target_update: 6685 case OMPD_declare_simd: 6686 case OMPD_declare_variant: 6687 case OMPD_declare_target: 6688 case OMPD_end_declare_target: 6689 case OMPD_declare_reduction: 6690 case OMPD_declare_mapper: 6691 case OMPD_taskloop: 6692 case OMPD_taskloop_simd: 6693 case OMPD_master_taskloop: 6694 case OMPD_master_taskloop_simd: 6695 case OMPD_parallel_master_taskloop: 6696 case OMPD_parallel_master_taskloop_simd: 6697 case OMPD_requires: 6698 case OMPD_unknown: 6699 break; 6700 } 6701 llvm_unreachable("Unexpected directive kind."); 6702 } 6703 6704 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6705 llvm::Value *DefaultThreadLimitVal) { 6706 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6707 CGF.getContext(), CS->getCapturedStmt()); 6708 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6709 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6710 llvm::Value *NumThreads = nullptr; 6711 llvm::Value *CondVal = nullptr; 6712 // Handle if clause. If if clause present, the number of threads is 6713 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6714 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6715 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6716 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6717 const OMPIfClause *IfClause = nullptr; 6718 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6719 if (C->getNameModifier() == OMPD_unknown || 6720 C->getNameModifier() == OMPD_parallel) { 6721 IfClause = C; 6722 break; 6723 } 6724 } 6725 if (IfClause) { 6726 const Expr *Cond = IfClause->getCondition(); 6727 bool Result; 6728 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6729 if (!Result) 6730 return CGF.Builder.getInt32(1); 6731 } else { 6732 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6733 if (const auto *PreInit = 6734 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6735 for (const auto *I : PreInit->decls()) { 6736 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6737 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6738 } else { 6739 CodeGenFunction::AutoVarEmission Emission = 6740 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6741 CGF.EmitAutoVarCleanups(Emission); 6742 } 6743 } 6744 } 6745 CondVal = CGF.EvaluateExprAsBool(Cond); 6746 } 6747 } 6748 } 6749 // Check the value of num_threads clause iff if clause was not specified 6750 // or is not evaluated to false. 6751 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6752 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6753 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6754 const auto *NumThreadsClause = 6755 Dir->getSingleClause<OMPNumThreadsClause>(); 6756 CodeGenFunction::LexicalScope Scope( 6757 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6758 if (const auto *PreInit = 6759 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6760 for (const auto *I : PreInit->decls()) { 6761 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6762 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6763 } else { 6764 CodeGenFunction::AutoVarEmission Emission = 6765 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6766 CGF.EmitAutoVarCleanups(Emission); 6767 } 6768 } 6769 } 6770 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6771 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6772 /*isSigned=*/false); 6773 if (DefaultThreadLimitVal) 6774 NumThreads = CGF.Builder.CreateSelect( 6775 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6776 DefaultThreadLimitVal, NumThreads); 6777 } else { 6778 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6779 : CGF.Builder.getInt32(0); 6780 } 6781 // Process condition of the if clause. 6782 if (CondVal) { 6783 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6784 CGF.Builder.getInt32(1)); 6785 } 6786 return NumThreads; 6787 } 6788 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6789 return CGF.Builder.getInt32(1); 6790 return DefaultThreadLimitVal; 6791 } 6792 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6793 : CGF.Builder.getInt32(0); 6794 } 6795 6796 /// Emit the number of threads for a target directive. Inspect the 6797 /// thread_limit clause associated with a teams construct combined or closely 6798 /// nested with the target directive. 6799 /// 6800 /// Emit the num_threads clause for directives such as 'target parallel' that 6801 /// have no associated teams construct. 6802 /// 6803 /// Otherwise, return nullptr. 6804 static llvm::Value * 6805 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6806 const OMPExecutableDirective &D) { 6807 assert(!CGF.getLangOpts().OpenMPIsDevice && 6808 "Clauses associated with the teams directive expected to be emitted " 6809 "only for the host!"); 6810 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6811 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6812 "Expected target-based executable directive."); 6813 CGBuilderTy &Bld = CGF.Builder; 6814 llvm::Value *ThreadLimitVal = nullptr; 6815 llvm::Value *NumThreadsVal = nullptr; 6816 switch (DirectiveKind) { 6817 case OMPD_target: { 6818 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6819 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6820 return NumThreads; 6821 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6822 CGF.getContext(), CS->getCapturedStmt()); 6823 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6824 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6825 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6827 const auto *ThreadLimitClause = 6828 Dir->getSingleClause<OMPThreadLimitClause>(); 6829 CodeGenFunction::LexicalScope Scope( 6830 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6831 if (const auto *PreInit = 6832 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6833 for (const auto *I : PreInit->decls()) { 6834 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6835 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6836 } else { 6837 CodeGenFunction::AutoVarEmission Emission = 6838 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6839 CGF.EmitAutoVarCleanups(Emission); 6840 } 6841 } 6842 } 6843 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6844 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6845 ThreadLimitVal = 6846 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6847 } 6848 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6849 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6850 CS = Dir->getInnermostCapturedStmt(); 6851 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6852 CGF.getContext(), CS->getCapturedStmt()); 6853 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6854 } 6855 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6856 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6857 CS = Dir->getInnermostCapturedStmt(); 6858 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6859 return NumThreads; 6860 } 6861 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6862 return Bld.getInt32(1); 6863 } 6864 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6865 } 6866 case OMPD_target_teams: { 6867 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6868 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6869 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6872 ThreadLimitVal = 6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6874 } 6875 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6876 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6877 return NumThreads; 6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6879 CGF.getContext(), CS->getCapturedStmt()); 6880 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6881 if (Dir->getDirectiveKind() == OMPD_distribute) { 6882 CS = Dir->getInnermostCapturedStmt(); 6883 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6884 return NumThreads; 6885 } 6886 } 6887 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6888 } 6889 case OMPD_target_teams_distribute: 6890 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6891 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6892 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6893 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6894 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6895 ThreadLimitVal = 6896 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6897 } 6898 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6899 case OMPD_target_parallel: 6900 case OMPD_target_parallel_for: 6901 case OMPD_target_parallel_for_simd: 6902 case OMPD_target_teams_distribute_parallel_for: 6903 case OMPD_target_teams_distribute_parallel_for_simd: { 6904 llvm::Value *CondVal = nullptr; 6905 // Handle if clause. If if clause present, the number of threads is 6906 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6907 if (D.hasClausesOfKind<OMPIfClause>()) { 6908 const OMPIfClause *IfClause = nullptr; 6909 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6910 if (C->getNameModifier() == OMPD_unknown || 6911 C->getNameModifier() == OMPD_parallel) { 6912 IfClause = C; 6913 break; 6914 } 6915 } 6916 if (IfClause) { 6917 const Expr *Cond = IfClause->getCondition(); 6918 bool Result; 6919 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6920 if (!Result) 6921 return Bld.getInt32(1); 6922 } else { 6923 CodeGenFunction::RunCleanupsScope Scope(CGF); 6924 CondVal = CGF.EvaluateExprAsBool(Cond); 6925 } 6926 } 6927 } 6928 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6929 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6930 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6931 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6932 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6933 ThreadLimitVal = 6934 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6935 } 6936 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6937 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6938 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6939 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6940 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6941 NumThreadsVal = 6942 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6943 ThreadLimitVal = ThreadLimitVal 6944 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6945 ThreadLimitVal), 6946 NumThreadsVal, ThreadLimitVal) 6947 : NumThreadsVal; 6948 } 6949 if (!ThreadLimitVal) 6950 ThreadLimitVal = Bld.getInt32(0); 6951 if (CondVal) 6952 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6953 return ThreadLimitVal; 6954 } 6955 case OMPD_target_teams_distribute_simd: 6956 case OMPD_target_simd: 6957 return Bld.getInt32(1); 6958 case OMPD_parallel: 6959 case OMPD_for: 6960 case OMPD_parallel_for: 6961 case OMPD_parallel_master: 6962 case OMPD_parallel_sections: 6963 case OMPD_for_simd: 6964 case OMPD_parallel_for_simd: 6965 case OMPD_cancel: 6966 case OMPD_cancellation_point: 6967 case OMPD_ordered: 6968 case OMPD_threadprivate: 6969 case OMPD_allocate: 6970 case OMPD_task: 6971 case OMPD_simd: 6972 case OMPD_sections: 6973 case OMPD_section: 6974 case OMPD_single: 6975 case OMPD_master: 6976 case OMPD_critical: 6977 case OMPD_taskyield: 6978 case OMPD_barrier: 6979 case OMPD_taskwait: 6980 case OMPD_taskgroup: 6981 case OMPD_atomic: 6982 case OMPD_flush: 6983 case OMPD_teams: 6984 case OMPD_target_data: 6985 case OMPD_target_exit_data: 6986 case OMPD_target_enter_data: 6987 case OMPD_distribute: 6988 case OMPD_distribute_simd: 6989 case OMPD_distribute_parallel_for: 6990 case OMPD_distribute_parallel_for_simd: 6991 case OMPD_teams_distribute: 6992 case OMPD_teams_distribute_simd: 6993 case OMPD_teams_distribute_parallel_for: 6994 case OMPD_teams_distribute_parallel_for_simd: 6995 case OMPD_target_update: 6996 case OMPD_declare_simd: 6997 case OMPD_declare_variant: 6998 case OMPD_declare_target: 6999 case OMPD_end_declare_target: 7000 case OMPD_declare_reduction: 7001 case OMPD_declare_mapper: 7002 case OMPD_taskloop: 7003 case OMPD_taskloop_simd: 7004 case OMPD_master_taskloop: 7005 case OMPD_master_taskloop_simd: 7006 case OMPD_parallel_master_taskloop: 7007 case OMPD_parallel_master_taskloop_simd: 7008 case OMPD_requires: 7009 case OMPD_unknown: 7010 break; 7011 } 7012 llvm_unreachable("Unsupported directive kind."); 7013 } 7014 7015 namespace { 7016 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7017 7018 // Utility to handle information from clauses associated with a given 7019 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7020 // It provides a convenient interface to obtain the information and generate 7021 // code for that information. 7022 class MappableExprsHandler { 7023 public: 7024 /// Values for bit flags used to specify the mapping type for 7025 /// offloading. 7026 enum OpenMPOffloadMappingFlags : uint64_t { 7027 /// No flags 7028 OMP_MAP_NONE = 0x0, 7029 /// Allocate memory on the device and move data from host to device. 7030 OMP_MAP_TO = 0x01, 7031 /// Allocate memory on the device and move data from device to host. 7032 OMP_MAP_FROM = 0x02, 7033 /// Always perform the requested mapping action on the element, even 7034 /// if it was already mapped before. 7035 OMP_MAP_ALWAYS = 0x04, 7036 /// Delete the element from the device environment, ignoring the 7037 /// current reference count associated with the element. 7038 OMP_MAP_DELETE = 0x08, 7039 /// The element being mapped is a pointer-pointee pair; both the 7040 /// pointer and the pointee should be mapped. 7041 OMP_MAP_PTR_AND_OBJ = 0x10, 7042 /// This flags signals that the base address of an entry should be 7043 /// passed to the target kernel as an argument. 7044 OMP_MAP_TARGET_PARAM = 0x20, 7045 /// Signal that the runtime library has to return the device pointer 7046 /// in the current position for the data being mapped. Used when we have the 7047 /// use_device_ptr clause. 7048 OMP_MAP_RETURN_PARAM = 0x40, 7049 /// This flag signals that the reference being passed is a pointer to 7050 /// private data. 7051 OMP_MAP_PRIVATE = 0x80, 7052 /// Pass the element to the device by value. 7053 OMP_MAP_LITERAL = 0x100, 7054 /// Implicit map 7055 OMP_MAP_IMPLICIT = 0x200, 7056 /// Close is a hint to the runtime to allocate memory close to 7057 /// the target device. 7058 OMP_MAP_CLOSE = 0x400, 7059 /// The 16 MSBs of the flags indicate whether the entry is member of some 7060 /// struct/class. 7061 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7062 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7063 }; 7064 7065 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7066 static unsigned getFlagMemberOffset() { 7067 unsigned Offset = 0; 7068 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7069 Remain = Remain >> 1) 7070 Offset++; 7071 return Offset; 7072 } 7073 7074 /// Class that associates information with a base pointer to be passed to the 7075 /// runtime library. 7076 class BasePointerInfo { 7077 /// The base pointer. 7078 llvm::Value *Ptr = nullptr; 7079 /// The base declaration that refers to this device pointer, or null if 7080 /// there is none. 7081 const ValueDecl *DevPtrDecl = nullptr; 7082 7083 public: 7084 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7085 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7086 llvm::Value *operator*() const { return Ptr; } 7087 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7088 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7089 }; 7090 7091 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7092 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7093 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7094 7095 /// Map between a struct and the its lowest & highest elements which have been 7096 /// mapped. 7097 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7098 /// HE(FieldIndex, Pointer)} 7099 struct StructRangeInfoTy { 7100 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7101 0, Address::invalid()}; 7102 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7103 0, Address::invalid()}; 7104 Address Base = Address::invalid(); 7105 }; 7106 7107 private: 7108 /// Kind that defines how a device pointer has to be returned. 7109 struct MapInfo { 7110 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7111 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7112 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7113 bool ReturnDevicePointer = false; 7114 bool IsImplicit = false; 7115 7116 MapInfo() = default; 7117 MapInfo( 7118 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7119 OpenMPMapClauseKind MapType, 7120 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7121 bool ReturnDevicePointer, bool IsImplicit) 7122 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7123 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7124 }; 7125 7126 /// If use_device_ptr is used on a pointer which is a struct member and there 7127 /// is no map information about it, then emission of that entry is deferred 7128 /// until the whole struct has been processed. 7129 struct DeferredDevicePtrEntryTy { 7130 const Expr *IE = nullptr; 7131 const ValueDecl *VD = nullptr; 7132 7133 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7134 : IE(IE), VD(VD) {} 7135 }; 7136 7137 /// The target directive from where the mappable clauses were extracted. It 7138 /// is either a executable directive or a user-defined mapper directive. 7139 llvm::PointerUnion<const OMPExecutableDirective *, 7140 const OMPDeclareMapperDecl *> 7141 CurDir; 7142 7143 /// Function the directive is being generated for. 7144 CodeGenFunction &CGF; 7145 7146 /// Set of all first private variables in the current directive. 7147 /// bool data is set to true if the variable is implicitly marked as 7148 /// firstprivate, false otherwise. 7149 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7150 7151 /// Map between device pointer declarations and their expression components. 7152 /// The key value for declarations in 'this' is null. 7153 llvm::DenseMap< 7154 const ValueDecl *, 7155 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7156 DevPointersMap; 7157 7158 llvm::Value *getExprTypeSize(const Expr *E) const { 7159 QualType ExprTy = E->getType().getCanonicalType(); 7160 7161 // Reference types are ignored for mapping purposes. 7162 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7163 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7164 7165 // Given that an array section is considered a built-in type, we need to 7166 // do the calculation based on the length of the section instead of relying 7167 // on CGF.getTypeSize(E->getType()). 7168 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7169 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7170 OAE->getBase()->IgnoreParenImpCasts()) 7171 .getCanonicalType(); 7172 7173 // If there is no length associated with the expression and lower bound is 7174 // not specified too, that means we are using the whole length of the 7175 // base. 7176 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7177 !OAE->getLowerBound()) 7178 return CGF.getTypeSize(BaseTy); 7179 7180 llvm::Value *ElemSize; 7181 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7182 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7183 } else { 7184 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7185 assert(ATy && "Expecting array type if not a pointer type."); 7186 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7187 } 7188 7189 // If we don't have a length at this point, that is because we have an 7190 // array section with a single element. 7191 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7192 return ElemSize; 7193 7194 if (const Expr *LenExpr = OAE->getLength()) { 7195 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7196 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7197 CGF.getContext().getSizeType(), 7198 LenExpr->getExprLoc()); 7199 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7200 } 7201 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7202 OAE->getLowerBound() && "expected array_section[lb:]."); 7203 // Size = sizetype - lb * elemtype; 7204 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7205 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7206 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7207 CGF.getContext().getSizeType(), 7208 OAE->getLowerBound()->getExprLoc()); 7209 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7210 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7211 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7212 LengthVal = CGF.Builder.CreateSelect( 7213 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7214 return LengthVal; 7215 } 7216 return CGF.getTypeSize(ExprTy); 7217 } 7218 7219 /// Return the corresponding bits for a given map clause modifier. Add 7220 /// a flag marking the map as a pointer if requested. Add a flag marking the 7221 /// map as the first one of a series of maps that relate to the same map 7222 /// expression. 7223 OpenMPOffloadMappingFlags getMapTypeBits( 7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7225 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7226 OpenMPOffloadMappingFlags Bits = 7227 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7228 switch (MapType) { 7229 case OMPC_MAP_alloc: 7230 case OMPC_MAP_release: 7231 // alloc and release is the default behavior in the runtime library, i.e. 7232 // if we don't pass any bits alloc/release that is what the runtime is 7233 // going to do. Therefore, we don't need to signal anything for these two 7234 // type modifiers. 7235 break; 7236 case OMPC_MAP_to: 7237 Bits |= OMP_MAP_TO; 7238 break; 7239 case OMPC_MAP_from: 7240 Bits |= OMP_MAP_FROM; 7241 break; 7242 case OMPC_MAP_tofrom: 7243 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7244 break; 7245 case OMPC_MAP_delete: 7246 Bits |= OMP_MAP_DELETE; 7247 break; 7248 case OMPC_MAP_unknown: 7249 llvm_unreachable("Unexpected map type!"); 7250 } 7251 if (AddPtrFlag) 7252 Bits |= OMP_MAP_PTR_AND_OBJ; 7253 if (AddIsTargetParamFlag) 7254 Bits |= OMP_MAP_TARGET_PARAM; 7255 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7256 != MapModifiers.end()) 7257 Bits |= OMP_MAP_ALWAYS; 7258 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7259 != MapModifiers.end()) 7260 Bits |= OMP_MAP_CLOSE; 7261 return Bits; 7262 } 7263 7264 /// Return true if the provided expression is a final array section. A 7265 /// final array section, is one whose length can't be proved to be one. 7266 bool isFinalArraySectionExpression(const Expr *E) const { 7267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7268 7269 // It is not an array section and therefore not a unity-size one. 7270 if (!OASE) 7271 return false; 7272 7273 // An array section with no colon always refer to a single element. 7274 if (OASE->getColonLoc().isInvalid()) 7275 return false; 7276 7277 const Expr *Length = OASE->getLength(); 7278 7279 // If we don't have a length we have to check if the array has size 1 7280 // for this dimension. Also, we should always expect a length if the 7281 // base type is pointer. 7282 if (!Length) { 7283 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7284 OASE->getBase()->IgnoreParenImpCasts()) 7285 .getCanonicalType(); 7286 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7287 return ATy->getSize().getSExtValue() != 1; 7288 // If we don't have a constant dimension length, we have to consider 7289 // the current section as having any size, so it is not necessarily 7290 // unitary. If it happen to be unity size, that's user fault. 7291 return true; 7292 } 7293 7294 // Check if the length evaluates to 1. 7295 Expr::EvalResult Result; 7296 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7297 return true; // Can have more that size 1. 7298 7299 llvm::APSInt ConstLength = Result.Val.getInt(); 7300 return ConstLength.getSExtValue() != 1; 7301 } 7302 7303 /// Generate the base pointers, section pointers, sizes and map type 7304 /// bits for the provided map type, map modifier, and expression components. 7305 /// \a IsFirstComponent should be set to true if the provided set of 7306 /// components is the first associated with a capture. 7307 void generateInfoForComponentList( 7308 OpenMPMapClauseKind MapType, 7309 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7310 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7311 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7312 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7313 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7314 bool IsImplicit, 7315 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7316 OverlappedElements = llvm::None) const { 7317 // The following summarizes what has to be generated for each map and the 7318 // types below. The generated information is expressed in this order: 7319 // base pointer, section pointer, size, flags 7320 // (to add to the ones that come from the map type and modifier). 7321 // 7322 // double d; 7323 // int i[100]; 7324 // float *p; 7325 // 7326 // struct S1 { 7327 // int i; 7328 // float f[50]; 7329 // } 7330 // struct S2 { 7331 // int i; 7332 // float f[50]; 7333 // S1 s; 7334 // double *p; 7335 // struct S2 *ps; 7336 // } 7337 // S2 s; 7338 // S2 *ps; 7339 // 7340 // map(d) 7341 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7342 // 7343 // map(i) 7344 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7345 // 7346 // map(i[1:23]) 7347 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7348 // 7349 // map(p) 7350 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7351 // 7352 // map(p[1:24]) 7353 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7354 // 7355 // map(s) 7356 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7357 // 7358 // map(s.i) 7359 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7360 // 7361 // map(s.s.f) 7362 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7363 // 7364 // map(s.p) 7365 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7366 // 7367 // map(to: s.p[:22]) 7368 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7369 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7370 // &(s.p), &(s.p[0]), 22*sizeof(double), 7371 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7372 // (*) alloc space for struct members, only this is a target parameter 7373 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7374 // optimizes this entry out, same in the examples below) 7375 // (***) map the pointee (map: to) 7376 // 7377 // map(s.ps) 7378 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7379 // 7380 // map(from: s.ps->s.i) 7381 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7382 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7383 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7384 // 7385 // map(to: s.ps->ps) 7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7387 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7388 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7389 // 7390 // map(s.ps->ps->ps) 7391 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7392 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7393 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7394 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7395 // 7396 // map(to: s.ps->ps->s.f[:22]) 7397 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7398 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7399 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7400 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7401 // 7402 // map(ps) 7403 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7404 // 7405 // map(ps->i) 7406 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7407 // 7408 // map(ps->s.f) 7409 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7410 // 7411 // map(from: ps->p) 7412 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7413 // 7414 // map(to: ps->p[:22]) 7415 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7416 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7417 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7418 // 7419 // map(ps->ps) 7420 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7421 // 7422 // map(from: ps->ps->s.i) 7423 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7424 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7425 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7426 // 7427 // map(from: ps->ps->ps) 7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7429 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7430 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7431 // 7432 // map(ps->ps->ps->ps) 7433 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7434 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7435 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7436 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7437 // 7438 // map(to: ps->ps->ps->s.f[:22]) 7439 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7440 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7441 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7442 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7443 // 7444 // map(to: s.f[:22]) map(from: s.p[:33]) 7445 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7446 // sizeof(double*) (**), TARGET_PARAM 7447 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7448 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7449 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7450 // (*) allocate contiguous space needed to fit all mapped members even if 7451 // we allocate space for members not mapped (in this example, 7452 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7453 // them as well because they fall between &s.f[0] and &s.p) 7454 // 7455 // map(from: s.f[:22]) map(to: ps->p[:33]) 7456 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7457 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7458 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7459 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7460 // (*) the struct this entry pertains to is the 2nd element in the list of 7461 // arguments, hence MEMBER_OF(2) 7462 // 7463 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7464 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7465 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7466 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7467 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7468 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7469 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7470 // (*) the struct this entry pertains to is the 4th element in the list 7471 // of arguments, hence MEMBER_OF(4) 7472 7473 // Track if the map information being generated is the first for a capture. 7474 bool IsCaptureFirstInfo = IsFirstComponentList; 7475 // When the variable is on a declare target link or in a to clause with 7476 // unified memory, a reference is needed to hold the host/device address 7477 // of the variable. 7478 bool RequiresReference = false; 7479 7480 // Scan the components from the base to the complete expression. 7481 auto CI = Components.rbegin(); 7482 auto CE = Components.rend(); 7483 auto I = CI; 7484 7485 // Track if the map information being generated is the first for a list of 7486 // components. 7487 bool IsExpressionFirstInfo = true; 7488 Address BP = Address::invalid(); 7489 const Expr *AssocExpr = I->getAssociatedExpression(); 7490 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7491 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7492 7493 if (isa<MemberExpr>(AssocExpr)) { 7494 // The base is the 'this' pointer. The content of the pointer is going 7495 // to be the base of the field being mapped. 7496 BP = CGF.LoadCXXThisAddress(); 7497 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7498 (OASE && 7499 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7500 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7501 } else { 7502 // The base is the reference to the variable. 7503 // BP = &Var. 7504 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7505 if (const auto *VD = 7506 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7507 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7508 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7509 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7510 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7511 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7512 RequiresReference = true; 7513 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7514 } 7515 } 7516 } 7517 7518 // If the variable is a pointer and is being dereferenced (i.e. is not 7519 // the last component), the base has to be the pointer itself, not its 7520 // reference. References are ignored for mapping purposes. 7521 QualType Ty = 7522 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7523 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7524 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7525 7526 // We do not need to generate individual map information for the 7527 // pointer, it can be associated with the combined storage. 7528 ++I; 7529 } 7530 } 7531 7532 // Track whether a component of the list should be marked as MEMBER_OF some 7533 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7534 // in a component list should be marked as MEMBER_OF, all subsequent entries 7535 // do not belong to the base struct. E.g. 7536 // struct S2 s; 7537 // s.ps->ps->ps->f[:] 7538 // (1) (2) (3) (4) 7539 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7540 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7541 // is the pointee of ps(2) which is not member of struct s, so it should not 7542 // be marked as such (it is still PTR_AND_OBJ). 7543 // The variable is initialized to false so that PTR_AND_OBJ entries which 7544 // are not struct members are not considered (e.g. array of pointers to 7545 // data). 7546 bool ShouldBeMemberOf = false; 7547 7548 // Variable keeping track of whether or not we have encountered a component 7549 // in the component list which is a member expression. Useful when we have a 7550 // pointer or a final array section, in which case it is the previous 7551 // component in the list which tells us whether we have a member expression. 7552 // E.g. X.f[:] 7553 // While processing the final array section "[:]" it is "f" which tells us 7554 // whether we are dealing with a member of a declared struct. 7555 const MemberExpr *EncounteredME = nullptr; 7556 7557 for (; I != CE; ++I) { 7558 // If the current component is member of a struct (parent struct) mark it. 7559 if (!EncounteredME) { 7560 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7561 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7562 // as MEMBER_OF the parent struct. 7563 if (EncounteredME) 7564 ShouldBeMemberOf = true; 7565 } 7566 7567 auto Next = std::next(I); 7568 7569 // We need to generate the addresses and sizes if this is the last 7570 // component, if the component is a pointer or if it is an array section 7571 // whose length can't be proved to be one. If this is a pointer, it 7572 // becomes the base address for the following components. 7573 7574 // A final array section, is one whose length can't be proved to be one. 7575 bool IsFinalArraySection = 7576 isFinalArraySectionExpression(I->getAssociatedExpression()); 7577 7578 // Get information on whether the element is a pointer. Have to do a 7579 // special treatment for array sections given that they are built-in 7580 // types. 7581 const auto *OASE = 7582 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7583 bool IsPointer = 7584 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7585 .getCanonicalType() 7586 ->isAnyPointerType()) || 7587 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7588 7589 if (Next == CE || IsPointer || IsFinalArraySection) { 7590 // If this is not the last component, we expect the pointer to be 7591 // associated with an array expression or member expression. 7592 assert((Next == CE || 7593 isa<MemberExpr>(Next->getAssociatedExpression()) || 7594 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7595 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7596 "Unexpected expression"); 7597 7598 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7599 .getAddress(CGF); 7600 7601 // If this component is a pointer inside the base struct then we don't 7602 // need to create any entry for it - it will be combined with the object 7603 // it is pointing to into a single PTR_AND_OBJ entry. 7604 bool IsMemberPointer = 7605 IsPointer && EncounteredME && 7606 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7607 EncounteredME); 7608 if (!OverlappedElements.empty()) { 7609 // Handle base element with the info for overlapped elements. 7610 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7611 assert(Next == CE && 7612 "Expected last element for the overlapped elements."); 7613 assert(!IsPointer && 7614 "Unexpected base element with the pointer type."); 7615 // Mark the whole struct as the struct that requires allocation on the 7616 // device. 7617 PartialStruct.LowestElem = {0, LB}; 7618 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7619 I->getAssociatedExpression()->getType()); 7620 Address HB = CGF.Builder.CreateConstGEP( 7621 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7622 CGF.VoidPtrTy), 7623 TypeSize.getQuantity() - 1); 7624 PartialStruct.HighestElem = { 7625 std::numeric_limits<decltype( 7626 PartialStruct.HighestElem.first)>::max(), 7627 HB}; 7628 PartialStruct.Base = BP; 7629 // Emit data for non-overlapped data. 7630 OpenMPOffloadMappingFlags Flags = 7631 OMP_MAP_MEMBER_OF | 7632 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7633 /*AddPtrFlag=*/false, 7634 /*AddIsTargetParamFlag=*/false); 7635 LB = BP; 7636 llvm::Value *Size = nullptr; 7637 // Do bitcopy of all non-overlapped structure elements. 7638 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7639 Component : OverlappedElements) { 7640 Address ComponentLB = Address::invalid(); 7641 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7642 Component) { 7643 if (MC.getAssociatedDeclaration()) { 7644 ComponentLB = 7645 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7646 .getAddress(CGF); 7647 Size = CGF.Builder.CreatePtrDiff( 7648 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7649 CGF.EmitCastToVoidPtr(LB.getPointer())); 7650 break; 7651 } 7652 } 7653 BasePointers.push_back(BP.getPointer()); 7654 Pointers.push_back(LB.getPointer()); 7655 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7656 /*isSigned=*/true)); 7657 Types.push_back(Flags); 7658 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7659 } 7660 BasePointers.push_back(BP.getPointer()); 7661 Pointers.push_back(LB.getPointer()); 7662 Size = CGF.Builder.CreatePtrDiff( 7663 CGF.EmitCastToVoidPtr( 7664 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7665 CGF.EmitCastToVoidPtr(LB.getPointer())); 7666 Sizes.push_back( 7667 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7668 Types.push_back(Flags); 7669 break; 7670 } 7671 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7672 if (!IsMemberPointer) { 7673 BasePointers.push_back(BP.getPointer()); 7674 Pointers.push_back(LB.getPointer()); 7675 Sizes.push_back( 7676 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7677 7678 // We need to add a pointer flag for each map that comes from the 7679 // same expression except for the first one. We also need to signal 7680 // this map is the first one that relates with the current capture 7681 // (there is a set of entries for each capture). 7682 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7683 MapType, MapModifiers, IsImplicit, 7684 !IsExpressionFirstInfo || RequiresReference, 7685 IsCaptureFirstInfo && !RequiresReference); 7686 7687 if (!IsExpressionFirstInfo) { 7688 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7689 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7690 if (IsPointer) 7691 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7692 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7693 7694 if (ShouldBeMemberOf) { 7695 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7696 // should be later updated with the correct value of MEMBER_OF. 7697 Flags |= OMP_MAP_MEMBER_OF; 7698 // From now on, all subsequent PTR_AND_OBJ entries should not be 7699 // marked as MEMBER_OF. 7700 ShouldBeMemberOf = false; 7701 } 7702 } 7703 7704 Types.push_back(Flags); 7705 } 7706 7707 // If we have encountered a member expression so far, keep track of the 7708 // mapped member. If the parent is "*this", then the value declaration 7709 // is nullptr. 7710 if (EncounteredME) { 7711 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7712 unsigned FieldIndex = FD->getFieldIndex(); 7713 7714 // Update info about the lowest and highest elements for this struct 7715 if (!PartialStruct.Base.isValid()) { 7716 PartialStruct.LowestElem = {FieldIndex, LB}; 7717 PartialStruct.HighestElem = {FieldIndex, LB}; 7718 PartialStruct.Base = BP; 7719 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7720 PartialStruct.LowestElem = {FieldIndex, LB}; 7721 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7722 PartialStruct.HighestElem = {FieldIndex, LB}; 7723 } 7724 } 7725 7726 // If we have a final array section, we are done with this expression. 7727 if (IsFinalArraySection) 7728 break; 7729 7730 // The pointer becomes the base for the next element. 7731 if (Next != CE) 7732 BP = LB; 7733 7734 IsExpressionFirstInfo = false; 7735 IsCaptureFirstInfo = false; 7736 } 7737 } 7738 } 7739 7740 /// Return the adjusted map modifiers if the declaration a capture refers to 7741 /// appears in a first-private clause. This is expected to be used only with 7742 /// directives that start with 'target'. 7743 MappableExprsHandler::OpenMPOffloadMappingFlags 7744 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7745 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7746 7747 // A first private variable captured by reference will use only the 7748 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7749 // declaration is known as first-private in this handler. 7750 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7751 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7752 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7753 return MappableExprsHandler::OMP_MAP_ALWAYS | 7754 MappableExprsHandler::OMP_MAP_TO; 7755 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7756 return MappableExprsHandler::OMP_MAP_TO | 7757 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7758 return MappableExprsHandler::OMP_MAP_PRIVATE | 7759 MappableExprsHandler::OMP_MAP_TO; 7760 } 7761 return MappableExprsHandler::OMP_MAP_TO | 7762 MappableExprsHandler::OMP_MAP_FROM; 7763 } 7764 7765 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7766 // Rotate by getFlagMemberOffset() bits. 7767 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7768 << getFlagMemberOffset()); 7769 } 7770 7771 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7772 OpenMPOffloadMappingFlags MemberOfFlag) { 7773 // If the entry is PTR_AND_OBJ but has not been marked with the special 7774 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7775 // marked as MEMBER_OF. 7776 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7777 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7778 return; 7779 7780 // Reset the placeholder value to prepare the flag for the assignment of the 7781 // proper MEMBER_OF value. 7782 Flags &= ~OMP_MAP_MEMBER_OF; 7783 Flags |= MemberOfFlag; 7784 } 7785 7786 void getPlainLayout(const CXXRecordDecl *RD, 7787 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7788 bool AsBase) const { 7789 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7790 7791 llvm::StructType *St = 7792 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7793 7794 unsigned NumElements = St->getNumElements(); 7795 llvm::SmallVector< 7796 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7797 RecordLayout(NumElements); 7798 7799 // Fill bases. 7800 for (const auto &I : RD->bases()) { 7801 if (I.isVirtual()) 7802 continue; 7803 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7804 // Ignore empty bases. 7805 if (Base->isEmpty() || CGF.getContext() 7806 .getASTRecordLayout(Base) 7807 .getNonVirtualSize() 7808 .isZero()) 7809 continue; 7810 7811 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7812 RecordLayout[FieldIndex] = Base; 7813 } 7814 // Fill in virtual bases. 7815 for (const auto &I : RD->vbases()) { 7816 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7817 // Ignore empty bases. 7818 if (Base->isEmpty()) 7819 continue; 7820 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7821 if (RecordLayout[FieldIndex]) 7822 continue; 7823 RecordLayout[FieldIndex] = Base; 7824 } 7825 // Fill in all the fields. 7826 assert(!RD->isUnion() && "Unexpected union."); 7827 for (const auto *Field : RD->fields()) { 7828 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7829 // will fill in later.) 7830 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7831 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7832 RecordLayout[FieldIndex] = Field; 7833 } 7834 } 7835 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7836 &Data : RecordLayout) { 7837 if (Data.isNull()) 7838 continue; 7839 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7840 getPlainLayout(Base, Layout, /*AsBase=*/true); 7841 else 7842 Layout.push_back(Data.get<const FieldDecl *>()); 7843 } 7844 } 7845 7846 public: 7847 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7848 : CurDir(&Dir), CGF(CGF) { 7849 // Extract firstprivate clause information. 7850 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7851 for (const auto *D : C->varlists()) 7852 FirstPrivateDecls.try_emplace( 7853 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7854 // Extract device pointer clause information. 7855 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7856 for (auto L : C->component_lists()) 7857 DevPointersMap[L.first].push_back(L.second); 7858 } 7859 7860 /// Constructor for the declare mapper directive. 7861 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7862 : CurDir(&Dir), CGF(CGF) {} 7863 7864 /// Generate code for the combined entry if we have a partially mapped struct 7865 /// and take care of the mapping flags of the arguments corresponding to 7866 /// individual struct members. 7867 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7868 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7869 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7870 const StructRangeInfoTy &PartialStruct) const { 7871 // Base is the base of the struct 7872 BasePointers.push_back(PartialStruct.Base.getPointer()); 7873 // Pointer is the address of the lowest element 7874 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7875 Pointers.push_back(LB); 7876 // Size is (addr of {highest+1} element) - (addr of lowest element) 7877 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7878 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7879 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7880 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7881 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7882 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7883 /*isSigned=*/false); 7884 Sizes.push_back(Size); 7885 // Map type is always TARGET_PARAM 7886 Types.push_back(OMP_MAP_TARGET_PARAM); 7887 // Remove TARGET_PARAM flag from the first element 7888 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7889 7890 // All other current entries will be MEMBER_OF the combined entry 7891 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7892 // 0xFFFF in the MEMBER_OF field). 7893 OpenMPOffloadMappingFlags MemberOfFlag = 7894 getMemberOfFlag(BasePointers.size() - 1); 7895 for (auto &M : CurTypes) 7896 setCorrectMemberOfFlag(M, MemberOfFlag); 7897 } 7898 7899 /// Generate all the base pointers, section pointers, sizes and map 7900 /// types for the extracted mappable expressions. Also, for each item that 7901 /// relates with a device pointer, a pair of the relevant declaration and 7902 /// index where it occurs is appended to the device pointers info array. 7903 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7904 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7905 MapFlagsArrayTy &Types) const { 7906 // We have to process the component lists that relate with the same 7907 // declaration in a single chunk so that we can generate the map flags 7908 // correctly. Therefore, we organize all lists in a map. 7909 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7910 7911 // Helper function to fill the information map for the different supported 7912 // clauses. 7913 auto &&InfoGen = [&Info]( 7914 const ValueDecl *D, 7915 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7916 OpenMPMapClauseKind MapType, 7917 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7918 bool ReturnDevicePointer, bool IsImplicit) { 7919 const ValueDecl *VD = 7920 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7921 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7922 IsImplicit); 7923 }; 7924 7925 assert(CurDir.is<const OMPExecutableDirective *>() && 7926 "Expect a executable directive"); 7927 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7928 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7929 for (const auto L : C->component_lists()) { 7930 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7931 /*ReturnDevicePointer=*/false, C->isImplicit()); 7932 } 7933 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7934 for (const auto L : C->component_lists()) { 7935 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7936 /*ReturnDevicePointer=*/false, C->isImplicit()); 7937 } 7938 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7939 for (const auto L : C->component_lists()) { 7940 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7941 /*ReturnDevicePointer=*/false, C->isImplicit()); 7942 } 7943 7944 // Look at the use_device_ptr clause information and mark the existing map 7945 // entries as such. If there is no map information for an entry in the 7946 // use_device_ptr list, we create one with map type 'alloc' and zero size 7947 // section. It is the user fault if that was not mapped before. If there is 7948 // no map information and the pointer is a struct member, then we defer the 7949 // emission of that entry until the whole struct has been processed. 7950 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7951 DeferredInfo; 7952 7953 for (const auto *C : 7954 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7955 for (const auto L : C->component_lists()) { 7956 assert(!L.second.empty() && "Not expecting empty list of components!"); 7957 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7958 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7959 const Expr *IE = L.second.back().getAssociatedExpression(); 7960 // If the first component is a member expression, we have to look into 7961 // 'this', which maps to null in the map of map information. Otherwise 7962 // look directly for the information. 7963 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7964 7965 // We potentially have map information for this declaration already. 7966 // Look for the first set of components that refer to it. 7967 if (It != Info.end()) { 7968 auto CI = std::find_if( 7969 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7970 return MI.Components.back().getAssociatedDeclaration() == VD; 7971 }); 7972 // If we found a map entry, signal that the pointer has to be returned 7973 // and move on to the next declaration. 7974 if (CI != It->second.end()) { 7975 CI->ReturnDevicePointer = true; 7976 continue; 7977 } 7978 } 7979 7980 // We didn't find any match in our map information - generate a zero 7981 // size array section - if the pointer is a struct member we defer this 7982 // action until the whole struct has been processed. 7983 if (isa<MemberExpr>(IE)) { 7984 // Insert the pointer into Info to be processed by 7985 // generateInfoForComponentList. Because it is a member pointer 7986 // without a pointee, no entry will be generated for it, therefore 7987 // we need to generate one after the whole struct has been processed. 7988 // Nonetheless, generateInfoForComponentList must be called to take 7989 // the pointer into account for the calculation of the range of the 7990 // partial struct. 7991 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7992 /*ReturnDevicePointer=*/false, C->isImplicit()); 7993 DeferredInfo[nullptr].emplace_back(IE, VD); 7994 } else { 7995 llvm::Value *Ptr = 7996 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7997 BasePointers.emplace_back(Ptr, VD); 7998 Pointers.push_back(Ptr); 7999 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8000 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8001 } 8002 } 8003 } 8004 8005 for (const auto &M : Info) { 8006 // We need to know when we generate information for the first component 8007 // associated with a capture, because the mapping flags depend on it. 8008 bool IsFirstComponentList = true; 8009 8010 // Temporary versions of arrays 8011 MapBaseValuesArrayTy CurBasePointers; 8012 MapValuesArrayTy CurPointers; 8013 MapValuesArrayTy CurSizes; 8014 MapFlagsArrayTy CurTypes; 8015 StructRangeInfoTy PartialStruct; 8016 8017 for (const MapInfo &L : M.second) { 8018 assert(!L.Components.empty() && 8019 "Not expecting declaration with no component lists."); 8020 8021 // Remember the current base pointer index. 8022 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8023 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8024 CurBasePointers, CurPointers, CurSizes, 8025 CurTypes, PartialStruct, 8026 IsFirstComponentList, L.IsImplicit); 8027 8028 // If this entry relates with a device pointer, set the relevant 8029 // declaration and add the 'return pointer' flag. 8030 if (L.ReturnDevicePointer) { 8031 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8032 "Unexpected number of mapped base pointers."); 8033 8034 const ValueDecl *RelevantVD = 8035 L.Components.back().getAssociatedDeclaration(); 8036 assert(RelevantVD && 8037 "No relevant declaration related with device pointer??"); 8038 8039 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8040 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8041 } 8042 IsFirstComponentList = false; 8043 } 8044 8045 // Append any pending zero-length pointers which are struct members and 8046 // used with use_device_ptr. 8047 auto CI = DeferredInfo.find(M.first); 8048 if (CI != DeferredInfo.end()) { 8049 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8050 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8051 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8052 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8053 CurBasePointers.emplace_back(BasePtr, L.VD); 8054 CurPointers.push_back(Ptr); 8055 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8056 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8057 // value MEMBER_OF=FFFF so that the entry is later updated with the 8058 // correct value of MEMBER_OF. 8059 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8060 OMP_MAP_MEMBER_OF); 8061 } 8062 } 8063 8064 // If there is an entry in PartialStruct it means we have a struct with 8065 // individual members mapped. Emit an extra combined entry. 8066 if (PartialStruct.Base.isValid()) 8067 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8068 PartialStruct); 8069 8070 // We need to append the results of this capture to what we already have. 8071 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8072 Pointers.append(CurPointers.begin(), CurPointers.end()); 8073 Sizes.append(CurSizes.begin(), CurSizes.end()); 8074 Types.append(CurTypes.begin(), CurTypes.end()); 8075 } 8076 } 8077 8078 /// Generate all the base pointers, section pointers, sizes and map types for 8079 /// the extracted map clauses of user-defined mapper. 8080 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8081 MapValuesArrayTy &Pointers, 8082 MapValuesArrayTy &Sizes, 8083 MapFlagsArrayTy &Types) const { 8084 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8085 "Expect a declare mapper directive"); 8086 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8087 // We have to process the component lists that relate with the same 8088 // declaration in a single chunk so that we can generate the map flags 8089 // correctly. Therefore, we organize all lists in a map. 8090 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8091 8092 // Helper function to fill the information map for the different supported 8093 // clauses. 8094 auto &&InfoGen = [&Info]( 8095 const ValueDecl *D, 8096 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8097 OpenMPMapClauseKind MapType, 8098 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8099 bool ReturnDevicePointer, bool IsImplicit) { 8100 const ValueDecl *VD = 8101 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8102 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8103 IsImplicit); 8104 }; 8105 8106 for (const auto *C : CurMapperDir->clauselists()) { 8107 const auto *MC = cast<OMPMapClause>(C); 8108 for (const auto L : MC->component_lists()) { 8109 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8110 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8111 } 8112 } 8113 8114 for (const auto &M : Info) { 8115 // We need to know when we generate information for the first component 8116 // associated with a capture, because the mapping flags depend on it. 8117 bool IsFirstComponentList = true; 8118 8119 // Temporary versions of arrays 8120 MapBaseValuesArrayTy CurBasePointers; 8121 MapValuesArrayTy CurPointers; 8122 MapValuesArrayTy CurSizes; 8123 MapFlagsArrayTy CurTypes; 8124 StructRangeInfoTy PartialStruct; 8125 8126 for (const MapInfo &L : M.second) { 8127 assert(!L.Components.empty() && 8128 "Not expecting declaration with no component lists."); 8129 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8130 CurBasePointers, CurPointers, CurSizes, 8131 CurTypes, PartialStruct, 8132 IsFirstComponentList, L.IsImplicit); 8133 IsFirstComponentList = false; 8134 } 8135 8136 // If there is an entry in PartialStruct it means we have a struct with 8137 // individual members mapped. Emit an extra combined entry. 8138 if (PartialStruct.Base.isValid()) 8139 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8140 PartialStruct); 8141 8142 // We need to append the results of this capture to what we already have. 8143 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8144 Pointers.append(CurPointers.begin(), CurPointers.end()); 8145 Sizes.append(CurSizes.begin(), CurSizes.end()); 8146 Types.append(CurTypes.begin(), CurTypes.end()); 8147 } 8148 } 8149 8150 /// Emit capture info for lambdas for variables captured by reference. 8151 void generateInfoForLambdaCaptures( 8152 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8153 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8154 MapFlagsArrayTy &Types, 8155 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8156 const auto *RD = VD->getType() 8157 .getCanonicalType() 8158 .getNonReferenceType() 8159 ->getAsCXXRecordDecl(); 8160 if (!RD || !RD->isLambda()) 8161 return; 8162 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8163 LValue VDLVal = CGF.MakeAddrLValue( 8164 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8165 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8166 FieldDecl *ThisCapture = nullptr; 8167 RD->getCaptureFields(Captures, ThisCapture); 8168 if (ThisCapture) { 8169 LValue ThisLVal = 8170 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8171 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8172 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8173 VDLVal.getPointer(CGF)); 8174 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8175 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8176 Sizes.push_back( 8177 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8178 CGF.Int64Ty, /*isSigned=*/true)); 8179 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8180 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8181 } 8182 for (const LambdaCapture &LC : RD->captures()) { 8183 if (!LC.capturesVariable()) 8184 continue; 8185 const VarDecl *VD = LC.getCapturedVar(); 8186 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8187 continue; 8188 auto It = Captures.find(VD); 8189 assert(It != Captures.end() && "Found lambda capture without field."); 8190 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8191 if (LC.getCaptureKind() == LCK_ByRef) { 8192 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8193 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8194 VDLVal.getPointer(CGF)); 8195 BasePointers.push_back(VarLVal.getPointer(CGF)); 8196 Pointers.push_back(VarLValVal.getPointer(CGF)); 8197 Sizes.push_back(CGF.Builder.CreateIntCast( 8198 CGF.getTypeSize( 8199 VD->getType().getCanonicalType().getNonReferenceType()), 8200 CGF.Int64Ty, /*isSigned=*/true)); 8201 } else { 8202 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8203 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8204 VDLVal.getPointer(CGF)); 8205 BasePointers.push_back(VarLVal.getPointer(CGF)); 8206 Pointers.push_back(VarRVal.getScalarVal()); 8207 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8208 } 8209 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8210 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8211 } 8212 } 8213 8214 /// Set correct indices for lambdas captures. 8215 void adjustMemberOfForLambdaCaptures( 8216 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8217 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8218 MapFlagsArrayTy &Types) const { 8219 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8220 // Set correct member_of idx for all implicit lambda captures. 8221 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8222 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8223 continue; 8224 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8225 assert(BasePtr && "Unable to find base lambda address."); 8226 int TgtIdx = -1; 8227 for (unsigned J = I; J > 0; --J) { 8228 unsigned Idx = J - 1; 8229 if (Pointers[Idx] != BasePtr) 8230 continue; 8231 TgtIdx = Idx; 8232 break; 8233 } 8234 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8235 // All other current entries will be MEMBER_OF the combined entry 8236 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8237 // 0xFFFF in the MEMBER_OF field). 8238 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8239 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8240 } 8241 } 8242 8243 /// Generate the base pointers, section pointers, sizes and map types 8244 /// associated to a given capture. 8245 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8246 llvm::Value *Arg, 8247 MapBaseValuesArrayTy &BasePointers, 8248 MapValuesArrayTy &Pointers, 8249 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8250 StructRangeInfoTy &PartialStruct) const { 8251 assert(!Cap->capturesVariableArrayType() && 8252 "Not expecting to generate map info for a variable array type!"); 8253 8254 // We need to know when we generating information for the first component 8255 const ValueDecl *VD = Cap->capturesThis() 8256 ? nullptr 8257 : Cap->getCapturedVar()->getCanonicalDecl(); 8258 8259 // If this declaration appears in a is_device_ptr clause we just have to 8260 // pass the pointer by value. If it is a reference to a declaration, we just 8261 // pass its value. 8262 if (DevPointersMap.count(VD)) { 8263 BasePointers.emplace_back(Arg, VD); 8264 Pointers.push_back(Arg); 8265 Sizes.push_back( 8266 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8267 CGF.Int64Ty, /*isSigned=*/true)); 8268 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8269 return; 8270 } 8271 8272 using MapData = 8273 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8274 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8275 SmallVector<MapData, 4> DeclComponentLists; 8276 assert(CurDir.is<const OMPExecutableDirective *>() && 8277 "Expect a executable directive"); 8278 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8279 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8280 for (const auto L : C->decl_component_lists(VD)) { 8281 assert(L.first == VD && 8282 "We got information for the wrong declaration??"); 8283 assert(!L.second.empty() && 8284 "Not expecting declaration with no component lists."); 8285 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8286 C->getMapTypeModifiers(), 8287 C->isImplicit()); 8288 } 8289 } 8290 8291 // Find overlapping elements (including the offset from the base element). 8292 llvm::SmallDenseMap< 8293 const MapData *, 8294 llvm::SmallVector< 8295 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8296 4> 8297 OverlappedData; 8298 size_t Count = 0; 8299 for (const MapData &L : DeclComponentLists) { 8300 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8301 OpenMPMapClauseKind MapType; 8302 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8303 bool IsImplicit; 8304 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8305 ++Count; 8306 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8308 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8309 auto CI = Components.rbegin(); 8310 auto CE = Components.rend(); 8311 auto SI = Components1.rbegin(); 8312 auto SE = Components1.rend(); 8313 for (; CI != CE && SI != SE; ++CI, ++SI) { 8314 if (CI->getAssociatedExpression()->getStmtClass() != 8315 SI->getAssociatedExpression()->getStmtClass()) 8316 break; 8317 // Are we dealing with different variables/fields? 8318 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8319 break; 8320 } 8321 // Found overlapping if, at least for one component, reached the head of 8322 // the components list. 8323 if (CI == CE || SI == SE) { 8324 assert((CI != CE || SI != SE) && 8325 "Unexpected full match of the mapping components."); 8326 const MapData &BaseData = CI == CE ? L : L1; 8327 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8328 SI == SE ? Components : Components1; 8329 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8330 OverlappedElements.getSecond().push_back(SubData); 8331 } 8332 } 8333 } 8334 // Sort the overlapped elements for each item. 8335 llvm::SmallVector<const FieldDecl *, 4> Layout; 8336 if (!OverlappedData.empty()) { 8337 if (const auto *CRD = 8338 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8339 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8340 else { 8341 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8342 Layout.append(RD->field_begin(), RD->field_end()); 8343 } 8344 } 8345 for (auto &Pair : OverlappedData) { 8346 llvm::sort( 8347 Pair.getSecond(), 8348 [&Layout]( 8349 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8350 OMPClauseMappableExprCommon::MappableExprComponentListRef 8351 Second) { 8352 auto CI = First.rbegin(); 8353 auto CE = First.rend(); 8354 auto SI = Second.rbegin(); 8355 auto SE = Second.rend(); 8356 for (; CI != CE && SI != SE; ++CI, ++SI) { 8357 if (CI->getAssociatedExpression()->getStmtClass() != 8358 SI->getAssociatedExpression()->getStmtClass()) 8359 break; 8360 // Are we dealing with different variables/fields? 8361 if (CI->getAssociatedDeclaration() != 8362 SI->getAssociatedDeclaration()) 8363 break; 8364 } 8365 8366 // Lists contain the same elements. 8367 if (CI == CE && SI == SE) 8368 return false; 8369 8370 // List with less elements is less than list with more elements. 8371 if (CI == CE || SI == SE) 8372 return CI == CE; 8373 8374 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8375 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8376 if (FD1->getParent() == FD2->getParent()) 8377 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8378 const auto It = 8379 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8380 return FD == FD1 || FD == FD2; 8381 }); 8382 return *It == FD1; 8383 }); 8384 } 8385 8386 // Associated with a capture, because the mapping flags depend on it. 8387 // Go through all of the elements with the overlapped elements. 8388 for (const auto &Pair : OverlappedData) { 8389 const MapData &L = *Pair.getFirst(); 8390 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8391 OpenMPMapClauseKind MapType; 8392 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8393 bool IsImplicit; 8394 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8395 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8396 OverlappedComponents = Pair.getSecond(); 8397 bool IsFirstComponentList = true; 8398 generateInfoForComponentList(MapType, MapModifiers, Components, 8399 BasePointers, Pointers, Sizes, Types, 8400 PartialStruct, IsFirstComponentList, 8401 IsImplicit, OverlappedComponents); 8402 } 8403 // Go through other elements without overlapped elements. 8404 bool IsFirstComponentList = OverlappedData.empty(); 8405 for (const MapData &L : DeclComponentLists) { 8406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8407 OpenMPMapClauseKind MapType; 8408 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8409 bool IsImplicit; 8410 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8411 auto It = OverlappedData.find(&L); 8412 if (It == OverlappedData.end()) 8413 generateInfoForComponentList(MapType, MapModifiers, Components, 8414 BasePointers, Pointers, Sizes, Types, 8415 PartialStruct, IsFirstComponentList, 8416 IsImplicit); 8417 IsFirstComponentList = false; 8418 } 8419 } 8420 8421 /// Generate the base pointers, section pointers, sizes and map types 8422 /// associated with the declare target link variables. 8423 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8424 MapValuesArrayTy &Pointers, 8425 MapValuesArrayTy &Sizes, 8426 MapFlagsArrayTy &Types) const { 8427 assert(CurDir.is<const OMPExecutableDirective *>() && 8428 "Expect a executable directive"); 8429 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8430 // Map other list items in the map clause which are not captured variables 8431 // but "declare target link" global variables. 8432 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8433 for (const auto L : C->component_lists()) { 8434 if (!L.first) 8435 continue; 8436 const auto *VD = dyn_cast<VarDecl>(L.first); 8437 if (!VD) 8438 continue; 8439 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8440 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8441 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8442 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8443 continue; 8444 StructRangeInfoTy PartialStruct; 8445 generateInfoForComponentList( 8446 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8447 Pointers, Sizes, Types, PartialStruct, 8448 /*IsFirstComponentList=*/true, C->isImplicit()); 8449 assert(!PartialStruct.Base.isValid() && 8450 "No partial structs for declare target link expected."); 8451 } 8452 } 8453 } 8454 8455 /// Generate the default map information for a given capture \a CI, 8456 /// record field declaration \a RI and captured value \a CV. 8457 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8458 const FieldDecl &RI, llvm::Value *CV, 8459 MapBaseValuesArrayTy &CurBasePointers, 8460 MapValuesArrayTy &CurPointers, 8461 MapValuesArrayTy &CurSizes, 8462 MapFlagsArrayTy &CurMapTypes) const { 8463 bool IsImplicit = true; 8464 // Do the default mapping. 8465 if (CI.capturesThis()) { 8466 CurBasePointers.push_back(CV); 8467 CurPointers.push_back(CV); 8468 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8469 CurSizes.push_back( 8470 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8471 CGF.Int64Ty, /*isSigned=*/true)); 8472 // Default map type. 8473 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8474 } else if (CI.capturesVariableByCopy()) { 8475 CurBasePointers.push_back(CV); 8476 CurPointers.push_back(CV); 8477 if (!RI.getType()->isAnyPointerType()) { 8478 // We have to signal to the runtime captures passed by value that are 8479 // not pointers. 8480 CurMapTypes.push_back(OMP_MAP_LITERAL); 8481 CurSizes.push_back(CGF.Builder.CreateIntCast( 8482 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8483 } else { 8484 // Pointers are implicitly mapped with a zero size and no flags 8485 // (other than first map that is added for all implicit maps). 8486 CurMapTypes.push_back(OMP_MAP_NONE); 8487 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8488 } 8489 const VarDecl *VD = CI.getCapturedVar(); 8490 auto I = FirstPrivateDecls.find(VD); 8491 if (I != FirstPrivateDecls.end()) 8492 IsImplicit = I->getSecond(); 8493 } else { 8494 assert(CI.capturesVariable() && "Expected captured reference."); 8495 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8496 QualType ElementType = PtrTy->getPointeeType(); 8497 CurSizes.push_back(CGF.Builder.CreateIntCast( 8498 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8499 // The default map type for a scalar/complex type is 'to' because by 8500 // default the value doesn't have to be retrieved. For an aggregate 8501 // type, the default is 'tofrom'. 8502 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8503 const VarDecl *VD = CI.getCapturedVar(); 8504 auto I = FirstPrivateDecls.find(VD); 8505 if (I != FirstPrivateDecls.end() && 8506 VD->getType().isConstant(CGF.getContext())) { 8507 llvm::Constant *Addr = 8508 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8509 // Copy the value of the original variable to the new global copy. 8510 CGF.Builder.CreateMemCpy( 8511 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8512 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8513 CurSizes.back(), /*IsVolatile=*/false); 8514 // Use new global variable as the base pointers. 8515 CurBasePointers.push_back(Addr); 8516 CurPointers.push_back(Addr); 8517 } else { 8518 CurBasePointers.push_back(CV); 8519 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8520 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8521 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8522 AlignmentSource::Decl)); 8523 CurPointers.push_back(PtrAddr.getPointer()); 8524 } else { 8525 CurPointers.push_back(CV); 8526 } 8527 } 8528 if (I != FirstPrivateDecls.end()) 8529 IsImplicit = I->getSecond(); 8530 } 8531 // Every default map produces a single argument which is a target parameter. 8532 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8533 8534 // Add flag stating this is an implicit map. 8535 if (IsImplicit) 8536 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8537 } 8538 }; 8539 } // anonymous namespace 8540 8541 /// Emit the arrays used to pass the captures and map information to the 8542 /// offloading runtime library. If there is no map or capture information, 8543 /// return nullptr by reference. 8544 static void 8545 emitOffloadingArrays(CodeGenFunction &CGF, 8546 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8547 MappableExprsHandler::MapValuesArrayTy &Pointers, 8548 MappableExprsHandler::MapValuesArrayTy &Sizes, 8549 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8550 CGOpenMPRuntime::TargetDataInfo &Info) { 8551 CodeGenModule &CGM = CGF.CGM; 8552 ASTContext &Ctx = CGF.getContext(); 8553 8554 // Reset the array information. 8555 Info.clearArrayInfo(); 8556 Info.NumberOfPtrs = BasePointers.size(); 8557 8558 if (Info.NumberOfPtrs) { 8559 // Detect if we have any capture size requiring runtime evaluation of the 8560 // size so that a constant array could be eventually used. 8561 bool hasRuntimeEvaluationCaptureSize = false; 8562 for (llvm::Value *S : Sizes) 8563 if (!isa<llvm::Constant>(S)) { 8564 hasRuntimeEvaluationCaptureSize = true; 8565 break; 8566 } 8567 8568 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8569 QualType PointerArrayType = Ctx.getConstantArrayType( 8570 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8571 /*IndexTypeQuals=*/0); 8572 8573 Info.BasePointersArray = 8574 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8575 Info.PointersArray = 8576 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8577 8578 // If we don't have any VLA types or other types that require runtime 8579 // evaluation, we can use a constant array for the map sizes, otherwise we 8580 // need to fill up the arrays as we do for the pointers. 8581 QualType Int64Ty = 8582 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8583 if (hasRuntimeEvaluationCaptureSize) { 8584 QualType SizeArrayType = Ctx.getConstantArrayType( 8585 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8586 /*IndexTypeQuals=*/0); 8587 Info.SizesArray = 8588 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8589 } else { 8590 // We expect all the sizes to be constant, so we collect them to create 8591 // a constant array. 8592 SmallVector<llvm::Constant *, 16> ConstSizes; 8593 for (llvm::Value *S : Sizes) 8594 ConstSizes.push_back(cast<llvm::Constant>(S)); 8595 8596 auto *SizesArrayInit = llvm::ConstantArray::get( 8597 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8598 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8599 auto *SizesArrayGbl = new llvm::GlobalVariable( 8600 CGM.getModule(), SizesArrayInit->getType(), 8601 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8602 SizesArrayInit, Name); 8603 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8604 Info.SizesArray = SizesArrayGbl; 8605 } 8606 8607 // The map types are always constant so we don't need to generate code to 8608 // fill arrays. Instead, we create an array constant. 8609 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8610 llvm::copy(MapTypes, Mapping.begin()); 8611 llvm::Constant *MapTypesArrayInit = 8612 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8613 std::string MaptypesName = 8614 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8615 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8616 CGM.getModule(), MapTypesArrayInit->getType(), 8617 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8618 MapTypesArrayInit, MaptypesName); 8619 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8620 Info.MapTypesArray = MapTypesArrayGbl; 8621 8622 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8623 llvm::Value *BPVal = *BasePointers[I]; 8624 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8625 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8626 Info.BasePointersArray, 0, I); 8627 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8628 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8629 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8630 CGF.Builder.CreateStore(BPVal, BPAddr); 8631 8632 if (Info.requiresDevicePointerInfo()) 8633 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8634 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8635 8636 llvm::Value *PVal = Pointers[I]; 8637 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8638 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8639 Info.PointersArray, 0, I); 8640 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8641 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8642 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8643 CGF.Builder.CreateStore(PVal, PAddr); 8644 8645 if (hasRuntimeEvaluationCaptureSize) { 8646 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8647 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8648 Info.SizesArray, 8649 /*Idx0=*/0, 8650 /*Idx1=*/I); 8651 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8652 CGF.Builder.CreateStore( 8653 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8654 SAddr); 8655 } 8656 } 8657 } 8658 } 8659 8660 /// Emit the arguments to be passed to the runtime library based on the 8661 /// arrays of pointers, sizes and map types. 8662 static void emitOffloadingArraysArgument( 8663 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8664 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8665 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8666 CodeGenModule &CGM = CGF.CGM; 8667 if (Info.NumberOfPtrs) { 8668 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8669 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8670 Info.BasePointersArray, 8671 /*Idx0=*/0, /*Idx1=*/0); 8672 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8673 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8674 Info.PointersArray, 8675 /*Idx0=*/0, 8676 /*Idx1=*/0); 8677 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8678 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8679 /*Idx0=*/0, /*Idx1=*/0); 8680 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8681 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8682 Info.MapTypesArray, 8683 /*Idx0=*/0, 8684 /*Idx1=*/0); 8685 } else { 8686 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8687 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8688 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8689 MapTypesArrayArg = 8690 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8691 } 8692 } 8693 8694 /// Check for inner distribute directive. 8695 static const OMPExecutableDirective * 8696 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8697 const auto *CS = D.getInnermostCapturedStmt(); 8698 const auto *Body = 8699 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8700 const Stmt *ChildStmt = 8701 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8702 8703 if (const auto *NestedDir = 8704 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8705 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8706 switch (D.getDirectiveKind()) { 8707 case OMPD_target: 8708 if (isOpenMPDistributeDirective(DKind)) 8709 return NestedDir; 8710 if (DKind == OMPD_teams) { 8711 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8712 /*IgnoreCaptured=*/true); 8713 if (!Body) 8714 return nullptr; 8715 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8716 if (const auto *NND = 8717 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8718 DKind = NND->getDirectiveKind(); 8719 if (isOpenMPDistributeDirective(DKind)) 8720 return NND; 8721 } 8722 } 8723 return nullptr; 8724 case OMPD_target_teams: 8725 if (isOpenMPDistributeDirective(DKind)) 8726 return NestedDir; 8727 return nullptr; 8728 case OMPD_target_parallel: 8729 case OMPD_target_simd: 8730 case OMPD_target_parallel_for: 8731 case OMPD_target_parallel_for_simd: 8732 return nullptr; 8733 case OMPD_target_teams_distribute: 8734 case OMPD_target_teams_distribute_simd: 8735 case OMPD_target_teams_distribute_parallel_for: 8736 case OMPD_target_teams_distribute_parallel_for_simd: 8737 case OMPD_parallel: 8738 case OMPD_for: 8739 case OMPD_parallel_for: 8740 case OMPD_parallel_master: 8741 case OMPD_parallel_sections: 8742 case OMPD_for_simd: 8743 case OMPD_parallel_for_simd: 8744 case OMPD_cancel: 8745 case OMPD_cancellation_point: 8746 case OMPD_ordered: 8747 case OMPD_threadprivate: 8748 case OMPD_allocate: 8749 case OMPD_task: 8750 case OMPD_simd: 8751 case OMPD_sections: 8752 case OMPD_section: 8753 case OMPD_single: 8754 case OMPD_master: 8755 case OMPD_critical: 8756 case OMPD_taskyield: 8757 case OMPD_barrier: 8758 case OMPD_taskwait: 8759 case OMPD_taskgroup: 8760 case OMPD_atomic: 8761 case OMPD_flush: 8762 case OMPD_teams: 8763 case OMPD_target_data: 8764 case OMPD_target_exit_data: 8765 case OMPD_target_enter_data: 8766 case OMPD_distribute: 8767 case OMPD_distribute_simd: 8768 case OMPD_distribute_parallel_for: 8769 case OMPD_distribute_parallel_for_simd: 8770 case OMPD_teams_distribute: 8771 case OMPD_teams_distribute_simd: 8772 case OMPD_teams_distribute_parallel_for: 8773 case OMPD_teams_distribute_parallel_for_simd: 8774 case OMPD_target_update: 8775 case OMPD_declare_simd: 8776 case OMPD_declare_variant: 8777 case OMPD_declare_target: 8778 case OMPD_end_declare_target: 8779 case OMPD_declare_reduction: 8780 case OMPD_declare_mapper: 8781 case OMPD_taskloop: 8782 case OMPD_taskloop_simd: 8783 case OMPD_master_taskloop: 8784 case OMPD_master_taskloop_simd: 8785 case OMPD_parallel_master_taskloop: 8786 case OMPD_parallel_master_taskloop_simd: 8787 case OMPD_requires: 8788 case OMPD_unknown: 8789 llvm_unreachable("Unexpected directive."); 8790 } 8791 } 8792 8793 return nullptr; 8794 } 8795 8796 /// Emit the user-defined mapper function. The code generation follows the 8797 /// pattern in the example below. 8798 /// \code 8799 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8800 /// void *base, void *begin, 8801 /// int64_t size, int64_t type) { 8802 /// // Allocate space for an array section first. 8803 /// if (size > 1 && !maptype.IsDelete) 8804 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8805 /// size*sizeof(Ty), clearToFrom(type)); 8806 /// // Map members. 8807 /// for (unsigned i = 0; i < size; i++) { 8808 /// // For each component specified by this mapper: 8809 /// for (auto c : all_components) { 8810 /// if (c.hasMapper()) 8811 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8812 /// c.arg_type); 8813 /// else 8814 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8815 /// c.arg_begin, c.arg_size, c.arg_type); 8816 /// } 8817 /// } 8818 /// // Delete the array section. 8819 /// if (size > 1 && maptype.IsDelete) 8820 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8821 /// size*sizeof(Ty), clearToFrom(type)); 8822 /// } 8823 /// \endcode 8824 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8825 CodeGenFunction *CGF) { 8826 if (UDMMap.count(D) > 0) 8827 return; 8828 ASTContext &C = CGM.getContext(); 8829 QualType Ty = D->getType(); 8830 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8831 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8832 auto *MapperVarDecl = 8833 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8834 SourceLocation Loc = D->getLocation(); 8835 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8836 8837 // Prepare mapper function arguments and attributes. 8838 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8839 C.VoidPtrTy, ImplicitParamDecl::Other); 8840 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8841 ImplicitParamDecl::Other); 8842 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8843 C.VoidPtrTy, ImplicitParamDecl::Other); 8844 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8845 ImplicitParamDecl::Other); 8846 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8847 ImplicitParamDecl::Other); 8848 FunctionArgList Args; 8849 Args.push_back(&HandleArg); 8850 Args.push_back(&BaseArg); 8851 Args.push_back(&BeginArg); 8852 Args.push_back(&SizeArg); 8853 Args.push_back(&TypeArg); 8854 const CGFunctionInfo &FnInfo = 8855 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8856 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8857 SmallString<64> TyStr; 8858 llvm::raw_svector_ostream Out(TyStr); 8859 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8860 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8861 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8862 Name, &CGM.getModule()); 8863 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8864 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8865 // Start the mapper function code generation. 8866 CodeGenFunction MapperCGF(CGM); 8867 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8868 // Compute the starting and end addreses of array elements. 8869 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8870 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8871 C.getPointerType(Int64Ty), Loc); 8872 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8873 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8874 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8875 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8876 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8877 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8878 C.getPointerType(Int64Ty), Loc); 8879 // Prepare common arguments for array initiation and deletion. 8880 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8881 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8882 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8883 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8884 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8885 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8886 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8887 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8888 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8889 8890 // Emit array initiation if this is an array section and \p MapType indicates 8891 // that memory allocation is required. 8892 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8893 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8894 ElementSize, HeadBB, /*IsInit=*/true); 8895 8896 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8897 8898 // Emit the loop header block. 8899 MapperCGF.EmitBlock(HeadBB); 8900 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8901 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8902 // Evaluate whether the initial condition is satisfied. 8903 llvm::Value *IsEmpty = 8904 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8905 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8906 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8907 8908 // Emit the loop body block. 8909 MapperCGF.EmitBlock(BodyBB); 8910 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8911 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8912 PtrPHI->addIncoming(PtrBegin, EntryBB); 8913 Address PtrCurrent = 8914 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8915 .getAlignment() 8916 .alignmentOfArrayElement(ElementSize)); 8917 // Privatize the declared variable of mapper to be the current array element. 8918 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8919 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8920 return MapperCGF 8921 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8922 .getAddress(MapperCGF); 8923 }); 8924 (void)Scope.Privatize(); 8925 8926 // Get map clause information. Fill up the arrays with all mapped variables. 8927 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8928 MappableExprsHandler::MapValuesArrayTy Pointers; 8929 MappableExprsHandler::MapValuesArrayTy Sizes; 8930 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8931 MappableExprsHandler MEHandler(*D, MapperCGF); 8932 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8933 8934 // Call the runtime API __tgt_mapper_num_components to get the number of 8935 // pre-existing components. 8936 llvm::Value *OffloadingArgs[] = {Handle}; 8937 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8938 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8939 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8940 PreviousSize, 8941 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8942 8943 // Fill up the runtime mapper handle for all components. 8944 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8945 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8946 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8947 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8948 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8949 llvm::Value *CurSizeArg = Sizes[I]; 8950 8951 // Extract the MEMBER_OF field from the map type. 8952 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8953 MapperCGF.EmitBlock(MemberBB); 8954 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8955 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8956 OriMapType, 8957 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8958 llvm::BasicBlock *MemberCombineBB = 8959 MapperCGF.createBasicBlock("omp.member.combine"); 8960 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8961 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8962 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8963 // Add the number of pre-existing components to the MEMBER_OF field if it 8964 // is valid. 8965 MapperCGF.EmitBlock(MemberCombineBB); 8966 llvm::Value *CombinedMember = 8967 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8968 // Do nothing if it is not a member of previous components. 8969 MapperCGF.EmitBlock(TypeBB); 8970 llvm::PHINode *MemberMapType = 8971 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 8972 MemberMapType->addIncoming(OriMapType, MemberBB); 8973 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 8974 8975 // Combine the map type inherited from user-defined mapper with that 8976 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 8977 // bits of the \a MapType, which is the input argument of the mapper 8978 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 8979 // bits of MemberMapType. 8980 // [OpenMP 5.0], 1.2.6. map-type decay. 8981 // | alloc | to | from | tofrom | release | delete 8982 // ---------------------------------------------------------- 8983 // alloc | alloc | alloc | alloc | alloc | release | delete 8984 // to | alloc | to | alloc | to | release | delete 8985 // from | alloc | alloc | from | from | release | delete 8986 // tofrom | alloc | to | from | tofrom | release | delete 8987 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 8988 MapType, 8989 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 8990 MappableExprsHandler::OMP_MAP_FROM)); 8991 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 8992 llvm::BasicBlock *AllocElseBB = 8993 MapperCGF.createBasicBlock("omp.type.alloc.else"); 8994 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 8995 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 8996 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 8997 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 8998 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 8999 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9000 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9001 MapperCGF.EmitBlock(AllocBB); 9002 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9003 MemberMapType, 9004 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9005 MappableExprsHandler::OMP_MAP_FROM))); 9006 MapperCGF.Builder.CreateBr(EndBB); 9007 MapperCGF.EmitBlock(AllocElseBB); 9008 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9009 LeftToFrom, 9010 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9011 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9012 // In case of to, clear OMP_MAP_FROM. 9013 MapperCGF.EmitBlock(ToBB); 9014 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9015 MemberMapType, 9016 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9017 MapperCGF.Builder.CreateBr(EndBB); 9018 MapperCGF.EmitBlock(ToElseBB); 9019 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9020 LeftToFrom, 9021 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9022 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9023 // In case of from, clear OMP_MAP_TO. 9024 MapperCGF.EmitBlock(FromBB); 9025 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9026 MemberMapType, 9027 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9028 // In case of tofrom, do nothing. 9029 MapperCGF.EmitBlock(EndBB); 9030 llvm::PHINode *CurMapType = 9031 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9032 CurMapType->addIncoming(AllocMapType, AllocBB); 9033 CurMapType->addIncoming(ToMapType, ToBB); 9034 CurMapType->addIncoming(FromMapType, FromBB); 9035 CurMapType->addIncoming(MemberMapType, ToElseBB); 9036 9037 // TODO: call the corresponding mapper function if a user-defined mapper is 9038 // associated with this map clause. 9039 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9040 // data structure. 9041 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9042 CurSizeArg, CurMapType}; 9043 MapperCGF.EmitRuntimeCall( 9044 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9045 OffloadingArgs); 9046 } 9047 9048 // Update the pointer to point to the next element that needs to be mapped, 9049 // and check whether we have mapped all elements. 9050 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9051 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9052 PtrPHI->addIncoming(PtrNext, BodyBB); 9053 llvm::Value *IsDone = 9054 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9055 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9056 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9057 9058 MapperCGF.EmitBlock(ExitBB); 9059 // Emit array deletion if this is an array section and \p MapType indicates 9060 // that deletion is required. 9061 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9062 ElementSize, DoneBB, /*IsInit=*/false); 9063 9064 // Emit the function exit block. 9065 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9066 MapperCGF.FinishFunction(); 9067 UDMMap.try_emplace(D, Fn); 9068 if (CGF) { 9069 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9070 Decls.second.push_back(D); 9071 } 9072 } 9073 9074 /// Emit the array initialization or deletion portion for user-defined mapper 9075 /// code generation. First, it evaluates whether an array section is mapped and 9076 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9077 /// true, and \a MapType indicates to not delete this array, array 9078 /// initialization code is generated. If \a IsInit is false, and \a MapType 9079 /// indicates to not this array, array deletion code is generated. 9080 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9081 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9082 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9083 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9084 StringRef Prefix = IsInit ? ".init" : ".del"; 9085 9086 // Evaluate if this is an array section. 9087 llvm::BasicBlock *IsDeleteBB = 9088 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9089 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9090 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9091 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9092 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9093 9094 // Evaluate if we are going to delete this section. 9095 MapperCGF.EmitBlock(IsDeleteBB); 9096 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9097 MapType, 9098 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9099 llvm::Value *DeleteCond; 9100 if (IsInit) { 9101 DeleteCond = MapperCGF.Builder.CreateIsNull( 9102 DeleteBit, "omp.array" + Prefix + ".delete"); 9103 } else { 9104 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9105 DeleteBit, "omp.array" + Prefix + ".delete"); 9106 } 9107 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9108 9109 MapperCGF.EmitBlock(BodyBB); 9110 // Get the array size by multiplying element size and element number (i.e., \p 9111 // Size). 9112 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9113 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9114 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9115 // memory allocation/deletion purpose only. 9116 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9117 MapType, 9118 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9119 MappableExprsHandler::OMP_MAP_FROM))); 9120 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9121 // data structure. 9122 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9123 MapperCGF.EmitRuntimeCall( 9124 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9125 } 9126 9127 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9128 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9129 llvm::Value *DeviceID, 9130 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9131 const OMPLoopDirective &D)> 9132 SizeEmitter) { 9133 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9134 const OMPExecutableDirective *TD = &D; 9135 // Get nested teams distribute kind directive, if any. 9136 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9137 TD = getNestedDistributeDirective(CGM.getContext(), D); 9138 if (!TD) 9139 return; 9140 const auto *LD = cast<OMPLoopDirective>(TD); 9141 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9142 PrePostActionTy &) { 9143 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9144 llvm::Value *Args[] = {DeviceID, NumIterations}; 9145 CGF.EmitRuntimeCall( 9146 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9147 } 9148 }; 9149 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9150 } 9151 9152 void CGOpenMPRuntime::emitTargetCall( 9153 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9154 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9155 const Expr *Device, 9156 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9157 const OMPLoopDirective &D)> 9158 SizeEmitter) { 9159 if (!CGF.HaveInsertPoint()) 9160 return; 9161 9162 assert(OutlinedFn && "Invalid outlined function!"); 9163 9164 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9165 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9166 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9167 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9168 PrePostActionTy &) { 9169 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9170 }; 9171 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9172 9173 CodeGenFunction::OMPTargetDataInfo InputInfo; 9174 llvm::Value *MapTypesArray = nullptr; 9175 // Fill up the pointer arrays and transfer execution to the device. 9176 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9177 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9178 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9179 // On top of the arrays that were filled up, the target offloading call 9180 // takes as arguments the device id as well as the host pointer. The host 9181 // pointer is used by the runtime library to identify the current target 9182 // region, so it only has to be unique and not necessarily point to 9183 // anything. It could be the pointer to the outlined function that 9184 // implements the target region, but we aren't using that so that the 9185 // compiler doesn't need to keep that, and could therefore inline the host 9186 // function if proven worthwhile during optimization. 9187 9188 // From this point on, we need to have an ID of the target region defined. 9189 assert(OutlinedFnID && "Invalid outlined function ID!"); 9190 9191 // Emit device ID if any. 9192 llvm::Value *DeviceID; 9193 if (Device) { 9194 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9195 CGF.Int64Ty, /*isSigned=*/true); 9196 } else { 9197 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9198 } 9199 9200 // Emit the number of elements in the offloading arrays. 9201 llvm::Value *PointerNum = 9202 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9203 9204 // Return value of the runtime offloading call. 9205 llvm::Value *Return; 9206 9207 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9208 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9209 9210 // Emit tripcount for the target loop-based directive. 9211 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9212 9213 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9214 // The target region is an outlined function launched by the runtime 9215 // via calls __tgt_target() or __tgt_target_teams(). 9216 // 9217 // __tgt_target() launches a target region with one team and one thread, 9218 // executing a serial region. This master thread may in turn launch 9219 // more threads within its team upon encountering a parallel region, 9220 // however, no additional teams can be launched on the device. 9221 // 9222 // __tgt_target_teams() launches a target region with one or more teams, 9223 // each with one or more threads. This call is required for target 9224 // constructs such as: 9225 // 'target teams' 9226 // 'target' / 'teams' 9227 // 'target teams distribute parallel for' 9228 // 'target parallel' 9229 // and so on. 9230 // 9231 // Note that on the host and CPU targets, the runtime implementation of 9232 // these calls simply call the outlined function without forking threads. 9233 // The outlined functions themselves have runtime calls to 9234 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9235 // the compiler in emitTeamsCall() and emitParallelCall(). 9236 // 9237 // In contrast, on the NVPTX target, the implementation of 9238 // __tgt_target_teams() launches a GPU kernel with the requested number 9239 // of teams and threads so no additional calls to the runtime are required. 9240 if (NumTeams) { 9241 // If we have NumTeams defined this means that we have an enclosed teams 9242 // region. Therefore we also expect to have NumThreads defined. These two 9243 // values should be defined in the presence of a teams directive, 9244 // regardless of having any clauses associated. If the user is using teams 9245 // but no clauses, these two values will be the default that should be 9246 // passed to the runtime library - a 32-bit integer with the value zero. 9247 assert(NumThreads && "Thread limit expression should be available along " 9248 "with number of teams."); 9249 llvm::Value *OffloadingArgs[] = {DeviceID, 9250 OutlinedFnID, 9251 PointerNum, 9252 InputInfo.BasePointersArray.getPointer(), 9253 InputInfo.PointersArray.getPointer(), 9254 InputInfo.SizesArray.getPointer(), 9255 MapTypesArray, 9256 NumTeams, 9257 NumThreads}; 9258 Return = CGF.EmitRuntimeCall( 9259 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9260 : OMPRTL__tgt_target_teams), 9261 OffloadingArgs); 9262 } else { 9263 llvm::Value *OffloadingArgs[] = {DeviceID, 9264 OutlinedFnID, 9265 PointerNum, 9266 InputInfo.BasePointersArray.getPointer(), 9267 InputInfo.PointersArray.getPointer(), 9268 InputInfo.SizesArray.getPointer(), 9269 MapTypesArray}; 9270 Return = CGF.EmitRuntimeCall( 9271 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9272 : OMPRTL__tgt_target), 9273 OffloadingArgs); 9274 } 9275 9276 // Check the error code and execute the host version if required. 9277 llvm::BasicBlock *OffloadFailedBlock = 9278 CGF.createBasicBlock("omp_offload.failed"); 9279 llvm::BasicBlock *OffloadContBlock = 9280 CGF.createBasicBlock("omp_offload.cont"); 9281 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9282 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9283 9284 CGF.EmitBlock(OffloadFailedBlock); 9285 if (RequiresOuterTask) { 9286 CapturedVars.clear(); 9287 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9288 } 9289 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9290 CGF.EmitBranch(OffloadContBlock); 9291 9292 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9293 }; 9294 9295 // Notify that the host version must be executed. 9296 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9297 RequiresOuterTask](CodeGenFunction &CGF, 9298 PrePostActionTy &) { 9299 if (RequiresOuterTask) { 9300 CapturedVars.clear(); 9301 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9302 } 9303 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9304 }; 9305 9306 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9307 &CapturedVars, RequiresOuterTask, 9308 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9309 // Fill up the arrays with all the captured variables. 9310 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9311 MappableExprsHandler::MapValuesArrayTy Pointers; 9312 MappableExprsHandler::MapValuesArrayTy Sizes; 9313 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9314 9315 // Get mappable expression information. 9316 MappableExprsHandler MEHandler(D, CGF); 9317 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9318 9319 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9320 auto CV = CapturedVars.begin(); 9321 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9322 CE = CS.capture_end(); 9323 CI != CE; ++CI, ++RI, ++CV) { 9324 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9325 MappableExprsHandler::MapValuesArrayTy CurPointers; 9326 MappableExprsHandler::MapValuesArrayTy CurSizes; 9327 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9328 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9329 9330 // VLA sizes are passed to the outlined region by copy and do not have map 9331 // information associated. 9332 if (CI->capturesVariableArrayType()) { 9333 CurBasePointers.push_back(*CV); 9334 CurPointers.push_back(*CV); 9335 CurSizes.push_back(CGF.Builder.CreateIntCast( 9336 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9337 // Copy to the device as an argument. No need to retrieve it. 9338 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9339 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9340 MappableExprsHandler::OMP_MAP_IMPLICIT); 9341 } else { 9342 // If we have any information in the map clause, we use it, otherwise we 9343 // just do a default mapping. 9344 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9345 CurSizes, CurMapTypes, PartialStruct); 9346 if (CurBasePointers.empty()) 9347 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9348 CurPointers, CurSizes, CurMapTypes); 9349 // Generate correct mapping for variables captured by reference in 9350 // lambdas. 9351 if (CI->capturesVariable()) 9352 MEHandler.generateInfoForLambdaCaptures( 9353 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9354 CurMapTypes, LambdaPointers); 9355 } 9356 // We expect to have at least an element of information for this capture. 9357 assert(!CurBasePointers.empty() && 9358 "Non-existing map pointer for capture!"); 9359 assert(CurBasePointers.size() == CurPointers.size() && 9360 CurBasePointers.size() == CurSizes.size() && 9361 CurBasePointers.size() == CurMapTypes.size() && 9362 "Inconsistent map information sizes!"); 9363 9364 // If there is an entry in PartialStruct it means we have a struct with 9365 // individual members mapped. Emit an extra combined entry. 9366 if (PartialStruct.Base.isValid()) 9367 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9368 CurMapTypes, PartialStruct); 9369 9370 // We need to append the results of this capture to what we already have. 9371 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9372 Pointers.append(CurPointers.begin(), CurPointers.end()); 9373 Sizes.append(CurSizes.begin(), CurSizes.end()); 9374 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9375 } 9376 // Adjust MEMBER_OF flags for the lambdas captures. 9377 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9378 Pointers, MapTypes); 9379 // Map other list items in the map clause which are not captured variables 9380 // but "declare target link" global variables. 9381 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9382 MapTypes); 9383 9384 TargetDataInfo Info; 9385 // Fill up the arrays and create the arguments. 9386 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9387 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9388 Info.PointersArray, Info.SizesArray, 9389 Info.MapTypesArray, Info); 9390 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9391 InputInfo.BasePointersArray = 9392 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9393 InputInfo.PointersArray = 9394 Address(Info.PointersArray, CGM.getPointerAlign()); 9395 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9396 MapTypesArray = Info.MapTypesArray; 9397 if (RequiresOuterTask) 9398 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9399 else 9400 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9401 }; 9402 9403 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9404 CodeGenFunction &CGF, PrePostActionTy &) { 9405 if (RequiresOuterTask) { 9406 CodeGenFunction::OMPTargetDataInfo InputInfo; 9407 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9408 } else { 9409 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9410 } 9411 }; 9412 9413 // If we have a target function ID it means that we need to support 9414 // offloading, otherwise, just execute on the host. We need to execute on host 9415 // regardless of the conditional in the if clause if, e.g., the user do not 9416 // specify target triples. 9417 if (OutlinedFnID) { 9418 if (IfCond) { 9419 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9420 } else { 9421 RegionCodeGenTy ThenRCG(TargetThenGen); 9422 ThenRCG(CGF); 9423 } 9424 } else { 9425 RegionCodeGenTy ElseRCG(TargetElseGen); 9426 ElseRCG(CGF); 9427 } 9428 } 9429 9430 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9431 StringRef ParentName) { 9432 if (!S) 9433 return; 9434 9435 // Codegen OMP target directives that offload compute to the device. 9436 bool RequiresDeviceCodegen = 9437 isa<OMPExecutableDirective>(S) && 9438 isOpenMPTargetExecutionDirective( 9439 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9440 9441 if (RequiresDeviceCodegen) { 9442 const auto &E = *cast<OMPExecutableDirective>(S); 9443 unsigned DeviceID; 9444 unsigned FileID; 9445 unsigned Line; 9446 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9447 FileID, Line); 9448 9449 // Is this a target region that should not be emitted as an entry point? If 9450 // so just signal we are done with this target region. 9451 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9452 ParentName, Line)) 9453 return; 9454 9455 switch (E.getDirectiveKind()) { 9456 case OMPD_target: 9457 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9458 cast<OMPTargetDirective>(E)); 9459 break; 9460 case OMPD_target_parallel: 9461 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9462 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9463 break; 9464 case OMPD_target_teams: 9465 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9466 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9467 break; 9468 case OMPD_target_teams_distribute: 9469 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9470 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9471 break; 9472 case OMPD_target_teams_distribute_simd: 9473 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9474 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9475 break; 9476 case OMPD_target_parallel_for: 9477 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9478 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9479 break; 9480 case OMPD_target_parallel_for_simd: 9481 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9482 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9483 break; 9484 case OMPD_target_simd: 9485 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9486 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9487 break; 9488 case OMPD_target_teams_distribute_parallel_for: 9489 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9490 CGM, ParentName, 9491 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9492 break; 9493 case OMPD_target_teams_distribute_parallel_for_simd: 9494 CodeGenFunction:: 9495 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9496 CGM, ParentName, 9497 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9498 break; 9499 case OMPD_parallel: 9500 case OMPD_for: 9501 case OMPD_parallel_for: 9502 case OMPD_parallel_master: 9503 case OMPD_parallel_sections: 9504 case OMPD_for_simd: 9505 case OMPD_parallel_for_simd: 9506 case OMPD_cancel: 9507 case OMPD_cancellation_point: 9508 case OMPD_ordered: 9509 case OMPD_threadprivate: 9510 case OMPD_allocate: 9511 case OMPD_task: 9512 case OMPD_simd: 9513 case OMPD_sections: 9514 case OMPD_section: 9515 case OMPD_single: 9516 case OMPD_master: 9517 case OMPD_critical: 9518 case OMPD_taskyield: 9519 case OMPD_barrier: 9520 case OMPD_taskwait: 9521 case OMPD_taskgroup: 9522 case OMPD_atomic: 9523 case OMPD_flush: 9524 case OMPD_teams: 9525 case OMPD_target_data: 9526 case OMPD_target_exit_data: 9527 case OMPD_target_enter_data: 9528 case OMPD_distribute: 9529 case OMPD_distribute_simd: 9530 case OMPD_distribute_parallel_for: 9531 case OMPD_distribute_parallel_for_simd: 9532 case OMPD_teams_distribute: 9533 case OMPD_teams_distribute_simd: 9534 case OMPD_teams_distribute_parallel_for: 9535 case OMPD_teams_distribute_parallel_for_simd: 9536 case OMPD_target_update: 9537 case OMPD_declare_simd: 9538 case OMPD_declare_variant: 9539 case OMPD_declare_target: 9540 case OMPD_end_declare_target: 9541 case OMPD_declare_reduction: 9542 case OMPD_declare_mapper: 9543 case OMPD_taskloop: 9544 case OMPD_taskloop_simd: 9545 case OMPD_master_taskloop: 9546 case OMPD_master_taskloop_simd: 9547 case OMPD_parallel_master_taskloop: 9548 case OMPD_parallel_master_taskloop_simd: 9549 case OMPD_requires: 9550 case OMPD_unknown: 9551 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9552 } 9553 return; 9554 } 9555 9556 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9557 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9558 return; 9559 9560 scanForTargetRegionsFunctions( 9561 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9562 return; 9563 } 9564 9565 // If this is a lambda function, look into its body. 9566 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9567 S = L->getBody(); 9568 9569 // Keep looking for target regions recursively. 9570 for (const Stmt *II : S->children()) 9571 scanForTargetRegionsFunctions(II, ParentName); 9572 } 9573 9574 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9575 // If emitting code for the host, we do not process FD here. Instead we do 9576 // the normal code generation. 9577 if (!CGM.getLangOpts().OpenMPIsDevice) { 9578 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9579 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9580 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9581 // Do not emit device_type(nohost) functions for the host. 9582 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9583 return true; 9584 } 9585 return false; 9586 } 9587 9588 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9589 // Try to detect target regions in the function. 9590 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9591 StringRef Name = CGM.getMangledName(GD); 9592 scanForTargetRegionsFunctions(FD->getBody(), Name); 9593 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9594 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9595 // Do not emit device_type(nohost) functions for the host. 9596 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9597 return true; 9598 } 9599 9600 // Do not to emit function if it is not marked as declare target. 9601 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9602 AlreadyEmittedTargetDecls.count(VD) == 0; 9603 } 9604 9605 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9606 if (!CGM.getLangOpts().OpenMPIsDevice) 9607 return false; 9608 9609 // Check if there are Ctors/Dtors in this declaration and look for target 9610 // regions in it. We use the complete variant to produce the kernel name 9611 // mangling. 9612 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9613 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9614 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9615 StringRef ParentName = 9616 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9617 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9618 } 9619 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9620 StringRef ParentName = 9621 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9622 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9623 } 9624 } 9625 9626 // Do not to emit variable if it is not marked as declare target. 9627 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9629 cast<VarDecl>(GD.getDecl())); 9630 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9631 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9632 HasRequiresUnifiedSharedMemory)) { 9633 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9634 return true; 9635 } 9636 return false; 9637 } 9638 9639 llvm::Constant * 9640 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9641 const VarDecl *VD) { 9642 assert(VD->getType().isConstant(CGM.getContext()) && 9643 "Expected constant variable."); 9644 StringRef VarName; 9645 llvm::Constant *Addr; 9646 llvm::GlobalValue::LinkageTypes Linkage; 9647 QualType Ty = VD->getType(); 9648 SmallString<128> Buffer; 9649 { 9650 unsigned DeviceID; 9651 unsigned FileID; 9652 unsigned Line; 9653 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9654 FileID, Line); 9655 llvm::raw_svector_ostream OS(Buffer); 9656 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9657 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9658 VarName = OS.str(); 9659 } 9660 Linkage = llvm::GlobalValue::InternalLinkage; 9661 Addr = 9662 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9663 getDefaultFirstprivateAddressSpace()); 9664 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9665 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9666 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9667 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9668 VarName, Addr, VarSize, 9669 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9670 return Addr; 9671 } 9672 9673 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9674 llvm::Constant *Addr) { 9675 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9676 !CGM.getLangOpts().OpenMPIsDevice) 9677 return; 9678 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9679 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9680 if (!Res) { 9681 if (CGM.getLangOpts().OpenMPIsDevice) { 9682 // Register non-target variables being emitted in device code (debug info 9683 // may cause this). 9684 StringRef VarName = CGM.getMangledName(VD); 9685 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9686 } 9687 return; 9688 } 9689 // Register declare target variables. 9690 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9691 StringRef VarName; 9692 CharUnits VarSize; 9693 llvm::GlobalValue::LinkageTypes Linkage; 9694 9695 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9696 !HasRequiresUnifiedSharedMemory) { 9697 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9698 VarName = CGM.getMangledName(VD); 9699 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9700 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9701 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9702 } else { 9703 VarSize = CharUnits::Zero(); 9704 } 9705 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9706 // Temp solution to prevent optimizations of the internal variables. 9707 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9708 std::string RefName = getName({VarName, "ref"}); 9709 if (!CGM.GetGlobalValue(RefName)) { 9710 llvm::Constant *AddrRef = 9711 getOrCreateInternalVariable(Addr->getType(), RefName); 9712 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9713 GVAddrRef->setConstant(/*Val=*/true); 9714 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9715 GVAddrRef->setInitializer(Addr); 9716 CGM.addCompilerUsedGlobal(GVAddrRef); 9717 } 9718 } 9719 } else { 9720 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9721 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9722 HasRequiresUnifiedSharedMemory)) && 9723 "Declare target attribute must link or to with unified memory."); 9724 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9725 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9726 else 9727 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9728 9729 if (CGM.getLangOpts().OpenMPIsDevice) { 9730 VarName = Addr->getName(); 9731 Addr = nullptr; 9732 } else { 9733 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9734 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9735 } 9736 VarSize = CGM.getPointerSize(); 9737 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9738 } 9739 9740 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9741 VarName, Addr, VarSize, Flags, Linkage); 9742 } 9743 9744 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9745 if (isa<FunctionDecl>(GD.getDecl()) || 9746 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9747 return emitTargetFunctions(GD); 9748 9749 return emitTargetGlobalVariable(GD); 9750 } 9751 9752 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9753 for (const VarDecl *VD : DeferredGlobalVariables) { 9754 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9755 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9756 if (!Res) 9757 continue; 9758 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9759 !HasRequiresUnifiedSharedMemory) { 9760 CGM.EmitGlobal(VD); 9761 } else { 9762 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9763 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9764 HasRequiresUnifiedSharedMemory)) && 9765 "Expected link clause or to clause with unified memory."); 9766 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9767 } 9768 } 9769 } 9770 9771 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9772 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9773 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9774 " Expected target-based directive."); 9775 } 9776 9777 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9778 const OMPRequiresDecl *D) { 9779 for (const OMPClause *Clause : D->clauselists()) { 9780 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9781 HasRequiresUnifiedSharedMemory = true; 9782 break; 9783 } 9784 } 9785 } 9786 9787 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9788 LangAS &AS) { 9789 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9790 return false; 9791 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9792 switch(A->getAllocatorType()) { 9793 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9794 // Not supported, fallback to the default mem space. 9795 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9796 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9797 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9798 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9799 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9800 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9801 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9802 AS = LangAS::Default; 9803 return true; 9804 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9805 llvm_unreachable("Expected predefined allocator for the variables with the " 9806 "static storage."); 9807 } 9808 return false; 9809 } 9810 9811 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9812 return HasRequiresUnifiedSharedMemory; 9813 } 9814 9815 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9816 CodeGenModule &CGM) 9817 : CGM(CGM) { 9818 if (CGM.getLangOpts().OpenMPIsDevice) { 9819 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9820 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9821 } 9822 } 9823 9824 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9825 if (CGM.getLangOpts().OpenMPIsDevice) 9826 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9827 } 9828 9829 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9830 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9831 return true; 9832 9833 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9834 // Do not to emit function if it is marked as declare target as it was already 9835 // emitted. 9836 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9837 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9838 if (auto *F = dyn_cast_or_null<llvm::Function>( 9839 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9840 return !F->isDeclaration(); 9841 return false; 9842 } 9843 return true; 9844 } 9845 9846 return !AlreadyEmittedTargetDecls.insert(D).second; 9847 } 9848 9849 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9850 // If we don't have entries or if we are emitting code for the device, we 9851 // don't need to do anything. 9852 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9853 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9854 (OffloadEntriesInfoManager.empty() && 9855 !HasEmittedDeclareTargetRegion && 9856 !HasEmittedTargetRegion)) 9857 return nullptr; 9858 9859 // Create and register the function that handles the requires directives. 9860 ASTContext &C = CGM.getContext(); 9861 9862 llvm::Function *RequiresRegFn; 9863 { 9864 CodeGenFunction CGF(CGM); 9865 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9866 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9867 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9868 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9869 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9870 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9871 // TODO: check for other requires clauses. 9872 // The requires directive takes effect only when a target region is 9873 // present in the compilation unit. Otherwise it is ignored and not 9874 // passed to the runtime. This avoids the runtime from throwing an error 9875 // for mismatching requires clauses across compilation units that don't 9876 // contain at least 1 target region. 9877 assert((HasEmittedTargetRegion || 9878 HasEmittedDeclareTargetRegion || 9879 !OffloadEntriesInfoManager.empty()) && 9880 "Target or declare target region expected."); 9881 if (HasRequiresUnifiedSharedMemory) 9882 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9883 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9884 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9885 CGF.FinishFunction(); 9886 } 9887 return RequiresRegFn; 9888 } 9889 9890 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9891 const OMPExecutableDirective &D, 9892 SourceLocation Loc, 9893 llvm::Function *OutlinedFn, 9894 ArrayRef<llvm::Value *> CapturedVars) { 9895 if (!CGF.HaveInsertPoint()) 9896 return; 9897 9898 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9899 CodeGenFunction::RunCleanupsScope Scope(CGF); 9900 9901 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9902 llvm::Value *Args[] = { 9903 RTLoc, 9904 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9905 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9906 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9907 RealArgs.append(std::begin(Args), std::end(Args)); 9908 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9909 9910 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9911 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9912 } 9913 9914 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9915 const Expr *NumTeams, 9916 const Expr *ThreadLimit, 9917 SourceLocation Loc) { 9918 if (!CGF.HaveInsertPoint()) 9919 return; 9920 9921 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9922 9923 llvm::Value *NumTeamsVal = 9924 NumTeams 9925 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9926 CGF.CGM.Int32Ty, /* isSigned = */ true) 9927 : CGF.Builder.getInt32(0); 9928 9929 llvm::Value *ThreadLimitVal = 9930 ThreadLimit 9931 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9932 CGF.CGM.Int32Ty, /* isSigned = */ true) 9933 : CGF.Builder.getInt32(0); 9934 9935 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9936 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9937 ThreadLimitVal}; 9938 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9939 PushNumTeamsArgs); 9940 } 9941 9942 void CGOpenMPRuntime::emitTargetDataCalls( 9943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9944 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9945 if (!CGF.HaveInsertPoint()) 9946 return; 9947 9948 // Action used to replace the default codegen action and turn privatization 9949 // off. 9950 PrePostActionTy NoPrivAction; 9951 9952 // Generate the code for the opening of the data environment. Capture all the 9953 // arguments of the runtime call by reference because they are used in the 9954 // closing of the region. 9955 auto &&BeginThenGen = [this, &D, Device, &Info, 9956 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9957 // Fill up the arrays with all the mapped variables. 9958 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9959 MappableExprsHandler::MapValuesArrayTy Pointers; 9960 MappableExprsHandler::MapValuesArrayTy Sizes; 9961 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9962 9963 // Get map clause information. 9964 MappableExprsHandler MCHandler(D, CGF); 9965 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9966 9967 // Fill up the arrays and create the arguments. 9968 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9969 9970 llvm::Value *BasePointersArrayArg = nullptr; 9971 llvm::Value *PointersArrayArg = nullptr; 9972 llvm::Value *SizesArrayArg = nullptr; 9973 llvm::Value *MapTypesArrayArg = nullptr; 9974 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9975 SizesArrayArg, MapTypesArrayArg, Info); 9976 9977 // Emit device ID if any. 9978 llvm::Value *DeviceID = nullptr; 9979 if (Device) { 9980 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9981 CGF.Int64Ty, /*isSigned=*/true); 9982 } else { 9983 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9984 } 9985 9986 // Emit the number of elements in the offloading arrays. 9987 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9988 9989 llvm::Value *OffloadingArgs[] = { 9990 DeviceID, PointerNum, BasePointersArrayArg, 9991 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9992 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9993 OffloadingArgs); 9994 9995 // If device pointer privatization is required, emit the body of the region 9996 // here. It will have to be duplicated: with and without privatization. 9997 if (!Info.CaptureDeviceAddrMap.empty()) 9998 CodeGen(CGF); 9999 }; 10000 10001 // Generate code for the closing of the data region. 10002 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10003 PrePostActionTy &) { 10004 assert(Info.isValid() && "Invalid data environment closing arguments."); 10005 10006 llvm::Value *BasePointersArrayArg = nullptr; 10007 llvm::Value *PointersArrayArg = nullptr; 10008 llvm::Value *SizesArrayArg = nullptr; 10009 llvm::Value *MapTypesArrayArg = nullptr; 10010 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10011 SizesArrayArg, MapTypesArrayArg, Info); 10012 10013 // Emit device ID if any. 10014 llvm::Value *DeviceID = nullptr; 10015 if (Device) { 10016 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10017 CGF.Int64Ty, /*isSigned=*/true); 10018 } else { 10019 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10020 } 10021 10022 // Emit the number of elements in the offloading arrays. 10023 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10024 10025 llvm::Value *OffloadingArgs[] = { 10026 DeviceID, PointerNum, BasePointersArrayArg, 10027 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10028 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10029 OffloadingArgs); 10030 }; 10031 10032 // If we need device pointer privatization, we need to emit the body of the 10033 // region with no privatization in the 'else' branch of the conditional. 10034 // Otherwise, we don't have to do anything. 10035 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10036 PrePostActionTy &) { 10037 if (!Info.CaptureDeviceAddrMap.empty()) { 10038 CodeGen.setAction(NoPrivAction); 10039 CodeGen(CGF); 10040 } 10041 }; 10042 10043 // We don't have to do anything to close the region if the if clause evaluates 10044 // to false. 10045 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10046 10047 if (IfCond) { 10048 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10049 } else { 10050 RegionCodeGenTy RCG(BeginThenGen); 10051 RCG(CGF); 10052 } 10053 10054 // If we don't require privatization of device pointers, we emit the body in 10055 // between the runtime calls. This avoids duplicating the body code. 10056 if (Info.CaptureDeviceAddrMap.empty()) { 10057 CodeGen.setAction(NoPrivAction); 10058 CodeGen(CGF); 10059 } 10060 10061 if (IfCond) { 10062 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10063 } else { 10064 RegionCodeGenTy RCG(EndThenGen); 10065 RCG(CGF); 10066 } 10067 } 10068 10069 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10070 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10071 const Expr *Device) { 10072 if (!CGF.HaveInsertPoint()) 10073 return; 10074 10075 assert((isa<OMPTargetEnterDataDirective>(D) || 10076 isa<OMPTargetExitDataDirective>(D) || 10077 isa<OMPTargetUpdateDirective>(D)) && 10078 "Expecting either target enter, exit data, or update directives."); 10079 10080 CodeGenFunction::OMPTargetDataInfo InputInfo; 10081 llvm::Value *MapTypesArray = nullptr; 10082 // Generate the code for the opening of the data environment. 10083 auto &&ThenGen = [this, &D, Device, &InputInfo, 10084 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10085 // Emit device ID if any. 10086 llvm::Value *DeviceID = nullptr; 10087 if (Device) { 10088 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10089 CGF.Int64Ty, /*isSigned=*/true); 10090 } else { 10091 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10092 } 10093 10094 // Emit the number of elements in the offloading arrays. 10095 llvm::Constant *PointerNum = 10096 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10097 10098 llvm::Value *OffloadingArgs[] = {DeviceID, 10099 PointerNum, 10100 InputInfo.BasePointersArray.getPointer(), 10101 InputInfo.PointersArray.getPointer(), 10102 InputInfo.SizesArray.getPointer(), 10103 MapTypesArray}; 10104 10105 // Select the right runtime function call for each expected standalone 10106 // directive. 10107 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10108 OpenMPRTLFunction RTLFn; 10109 switch (D.getDirectiveKind()) { 10110 case OMPD_target_enter_data: 10111 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10112 : OMPRTL__tgt_target_data_begin; 10113 break; 10114 case OMPD_target_exit_data: 10115 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10116 : OMPRTL__tgt_target_data_end; 10117 break; 10118 case OMPD_target_update: 10119 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10120 : OMPRTL__tgt_target_data_update; 10121 break; 10122 case OMPD_parallel: 10123 case OMPD_for: 10124 case OMPD_parallel_for: 10125 case OMPD_parallel_master: 10126 case OMPD_parallel_sections: 10127 case OMPD_for_simd: 10128 case OMPD_parallel_for_simd: 10129 case OMPD_cancel: 10130 case OMPD_cancellation_point: 10131 case OMPD_ordered: 10132 case OMPD_threadprivate: 10133 case OMPD_allocate: 10134 case OMPD_task: 10135 case OMPD_simd: 10136 case OMPD_sections: 10137 case OMPD_section: 10138 case OMPD_single: 10139 case OMPD_master: 10140 case OMPD_critical: 10141 case OMPD_taskyield: 10142 case OMPD_barrier: 10143 case OMPD_taskwait: 10144 case OMPD_taskgroup: 10145 case OMPD_atomic: 10146 case OMPD_flush: 10147 case OMPD_teams: 10148 case OMPD_target_data: 10149 case OMPD_distribute: 10150 case OMPD_distribute_simd: 10151 case OMPD_distribute_parallel_for: 10152 case OMPD_distribute_parallel_for_simd: 10153 case OMPD_teams_distribute: 10154 case OMPD_teams_distribute_simd: 10155 case OMPD_teams_distribute_parallel_for: 10156 case OMPD_teams_distribute_parallel_for_simd: 10157 case OMPD_declare_simd: 10158 case OMPD_declare_variant: 10159 case OMPD_declare_target: 10160 case OMPD_end_declare_target: 10161 case OMPD_declare_reduction: 10162 case OMPD_declare_mapper: 10163 case OMPD_taskloop: 10164 case OMPD_taskloop_simd: 10165 case OMPD_master_taskloop: 10166 case OMPD_master_taskloop_simd: 10167 case OMPD_parallel_master_taskloop: 10168 case OMPD_parallel_master_taskloop_simd: 10169 case OMPD_target: 10170 case OMPD_target_simd: 10171 case OMPD_target_teams_distribute: 10172 case OMPD_target_teams_distribute_simd: 10173 case OMPD_target_teams_distribute_parallel_for: 10174 case OMPD_target_teams_distribute_parallel_for_simd: 10175 case OMPD_target_teams: 10176 case OMPD_target_parallel: 10177 case OMPD_target_parallel_for: 10178 case OMPD_target_parallel_for_simd: 10179 case OMPD_requires: 10180 case OMPD_unknown: 10181 llvm_unreachable("Unexpected standalone target data directive."); 10182 break; 10183 } 10184 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10185 }; 10186 10187 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10188 CodeGenFunction &CGF, PrePostActionTy &) { 10189 // Fill up the arrays with all the mapped variables. 10190 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10191 MappableExprsHandler::MapValuesArrayTy Pointers; 10192 MappableExprsHandler::MapValuesArrayTy Sizes; 10193 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10194 10195 // Get map clause information. 10196 MappableExprsHandler MEHandler(D, CGF); 10197 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10198 10199 TargetDataInfo Info; 10200 // Fill up the arrays and create the arguments. 10201 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10202 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10203 Info.PointersArray, Info.SizesArray, 10204 Info.MapTypesArray, Info); 10205 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10206 InputInfo.BasePointersArray = 10207 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10208 InputInfo.PointersArray = 10209 Address(Info.PointersArray, CGM.getPointerAlign()); 10210 InputInfo.SizesArray = 10211 Address(Info.SizesArray, CGM.getPointerAlign()); 10212 MapTypesArray = Info.MapTypesArray; 10213 if (D.hasClausesOfKind<OMPDependClause>()) 10214 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10215 else 10216 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10217 }; 10218 10219 if (IfCond) { 10220 emitIfClause(CGF, IfCond, TargetThenGen, 10221 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10222 } else { 10223 RegionCodeGenTy ThenRCG(TargetThenGen); 10224 ThenRCG(CGF); 10225 } 10226 } 10227 10228 namespace { 10229 /// Kind of parameter in a function with 'declare simd' directive. 10230 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10231 /// Attribute set of the parameter. 10232 struct ParamAttrTy { 10233 ParamKindTy Kind = Vector; 10234 llvm::APSInt StrideOrArg; 10235 llvm::APSInt Alignment; 10236 }; 10237 } // namespace 10238 10239 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10240 ArrayRef<ParamAttrTy> ParamAttrs) { 10241 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10242 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10243 // of that clause. The VLEN value must be power of 2. 10244 // In other case the notion of the function`s "characteristic data type" (CDT) 10245 // is used to compute the vector length. 10246 // CDT is defined in the following order: 10247 // a) For non-void function, the CDT is the return type. 10248 // b) If the function has any non-uniform, non-linear parameters, then the 10249 // CDT is the type of the first such parameter. 10250 // c) If the CDT determined by a) or b) above is struct, union, or class 10251 // type which is pass-by-value (except for the type that maps to the 10252 // built-in complex data type), the characteristic data type is int. 10253 // d) If none of the above three cases is applicable, the CDT is int. 10254 // The VLEN is then determined based on the CDT and the size of vector 10255 // register of that ISA for which current vector version is generated. The 10256 // VLEN is computed using the formula below: 10257 // VLEN = sizeof(vector_register) / sizeof(CDT), 10258 // where vector register size specified in section 3.2.1 Registers and the 10259 // Stack Frame of original AMD64 ABI document. 10260 QualType RetType = FD->getReturnType(); 10261 if (RetType.isNull()) 10262 return 0; 10263 ASTContext &C = FD->getASTContext(); 10264 QualType CDT; 10265 if (!RetType.isNull() && !RetType->isVoidType()) { 10266 CDT = RetType; 10267 } else { 10268 unsigned Offset = 0; 10269 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10270 if (ParamAttrs[Offset].Kind == Vector) 10271 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10272 ++Offset; 10273 } 10274 if (CDT.isNull()) { 10275 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10276 if (ParamAttrs[I + Offset].Kind == Vector) { 10277 CDT = FD->getParamDecl(I)->getType(); 10278 break; 10279 } 10280 } 10281 } 10282 } 10283 if (CDT.isNull()) 10284 CDT = C.IntTy; 10285 CDT = CDT->getCanonicalTypeUnqualified(); 10286 if (CDT->isRecordType() || CDT->isUnionType()) 10287 CDT = C.IntTy; 10288 return C.getTypeSize(CDT); 10289 } 10290 10291 static void 10292 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10293 const llvm::APSInt &VLENVal, 10294 ArrayRef<ParamAttrTy> ParamAttrs, 10295 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10296 struct ISADataTy { 10297 char ISA; 10298 unsigned VecRegSize; 10299 }; 10300 ISADataTy ISAData[] = { 10301 { 10302 'b', 128 10303 }, // SSE 10304 { 10305 'c', 256 10306 }, // AVX 10307 { 10308 'd', 256 10309 }, // AVX2 10310 { 10311 'e', 512 10312 }, // AVX512 10313 }; 10314 llvm::SmallVector<char, 2> Masked; 10315 switch (State) { 10316 case OMPDeclareSimdDeclAttr::BS_Undefined: 10317 Masked.push_back('N'); 10318 Masked.push_back('M'); 10319 break; 10320 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10321 Masked.push_back('N'); 10322 break; 10323 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10324 Masked.push_back('M'); 10325 break; 10326 } 10327 for (char Mask : Masked) { 10328 for (const ISADataTy &Data : ISAData) { 10329 SmallString<256> Buffer; 10330 llvm::raw_svector_ostream Out(Buffer); 10331 Out << "_ZGV" << Data.ISA << Mask; 10332 if (!VLENVal) { 10333 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10334 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10335 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10336 } else { 10337 Out << VLENVal; 10338 } 10339 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10340 switch (ParamAttr.Kind){ 10341 case LinearWithVarStride: 10342 Out << 's' << ParamAttr.StrideOrArg; 10343 break; 10344 case Linear: 10345 Out << 'l'; 10346 if (!!ParamAttr.StrideOrArg) 10347 Out << ParamAttr.StrideOrArg; 10348 break; 10349 case Uniform: 10350 Out << 'u'; 10351 break; 10352 case Vector: 10353 Out << 'v'; 10354 break; 10355 } 10356 if (!!ParamAttr.Alignment) 10357 Out << 'a' << ParamAttr.Alignment; 10358 } 10359 Out << '_' << Fn->getName(); 10360 Fn->addFnAttr(Out.str()); 10361 } 10362 } 10363 } 10364 10365 // This are the Functions that are needed to mangle the name of the 10366 // vector functions generated by the compiler, according to the rules 10367 // defined in the "Vector Function ABI specifications for AArch64", 10368 // available at 10369 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10370 10371 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10372 /// 10373 /// TODO: Need to implement the behavior for reference marked with a 10374 /// var or no linear modifiers (1.b in the section). For this, we 10375 /// need to extend ParamKindTy to support the linear modifiers. 10376 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10377 QT = QT.getCanonicalType(); 10378 10379 if (QT->isVoidType()) 10380 return false; 10381 10382 if (Kind == ParamKindTy::Uniform) 10383 return false; 10384 10385 if (Kind == ParamKindTy::Linear) 10386 return false; 10387 10388 // TODO: Handle linear references with modifiers 10389 10390 if (Kind == ParamKindTy::LinearWithVarStride) 10391 return false; 10392 10393 return true; 10394 } 10395 10396 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10397 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10398 QT = QT.getCanonicalType(); 10399 unsigned Size = C.getTypeSize(QT); 10400 10401 // Only scalars and complex within 16 bytes wide set PVB to true. 10402 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10403 return false; 10404 10405 if (QT->isFloatingType()) 10406 return true; 10407 10408 if (QT->isIntegerType()) 10409 return true; 10410 10411 if (QT->isPointerType()) 10412 return true; 10413 10414 // TODO: Add support for complex types (section 3.1.2, item 2). 10415 10416 return false; 10417 } 10418 10419 /// Computes the lane size (LS) of a return type or of an input parameter, 10420 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10421 /// TODO: Add support for references, section 3.2.1, item 1. 10422 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10423 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10424 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10425 if (getAArch64PBV(PTy, C)) 10426 return C.getTypeSize(PTy); 10427 } 10428 if (getAArch64PBV(QT, C)) 10429 return C.getTypeSize(QT); 10430 10431 return C.getTypeSize(C.getUIntPtrType()); 10432 } 10433 10434 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10435 // signature of the scalar function, as defined in 3.2.2 of the 10436 // AAVFABI. 10437 static std::tuple<unsigned, unsigned, bool> 10438 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10439 QualType RetType = FD->getReturnType().getCanonicalType(); 10440 10441 ASTContext &C = FD->getASTContext(); 10442 10443 bool OutputBecomesInput = false; 10444 10445 llvm::SmallVector<unsigned, 8> Sizes; 10446 if (!RetType->isVoidType()) { 10447 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10448 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10449 OutputBecomesInput = true; 10450 } 10451 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10452 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10453 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10454 } 10455 10456 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10457 // The LS of a function parameter / return value can only be a power 10458 // of 2, starting from 8 bits, up to 128. 10459 assert(std::all_of(Sizes.begin(), Sizes.end(), 10460 [](unsigned Size) { 10461 return Size == 8 || Size == 16 || Size == 32 || 10462 Size == 64 || Size == 128; 10463 }) && 10464 "Invalid size"); 10465 10466 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10467 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10468 OutputBecomesInput); 10469 } 10470 10471 /// Mangle the parameter part of the vector function name according to 10472 /// their OpenMP classification. The mangling function is defined in 10473 /// section 3.5 of the AAVFABI. 10474 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10475 SmallString<256> Buffer; 10476 llvm::raw_svector_ostream Out(Buffer); 10477 for (const auto &ParamAttr : ParamAttrs) { 10478 switch (ParamAttr.Kind) { 10479 case LinearWithVarStride: 10480 Out << "ls" << ParamAttr.StrideOrArg; 10481 break; 10482 case Linear: 10483 Out << 'l'; 10484 // Don't print the step value if it is not present or if it is 10485 // equal to 1. 10486 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10487 Out << ParamAttr.StrideOrArg; 10488 break; 10489 case Uniform: 10490 Out << 'u'; 10491 break; 10492 case Vector: 10493 Out << 'v'; 10494 break; 10495 } 10496 10497 if (!!ParamAttr.Alignment) 10498 Out << 'a' << ParamAttr.Alignment; 10499 } 10500 10501 return Out.str(); 10502 } 10503 10504 // Function used to add the attribute. The parameter `VLEN` is 10505 // templated to allow the use of "x" when targeting scalable functions 10506 // for SVE. 10507 template <typename T> 10508 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10509 char ISA, StringRef ParSeq, 10510 StringRef MangledName, bool OutputBecomesInput, 10511 llvm::Function *Fn) { 10512 SmallString<256> Buffer; 10513 llvm::raw_svector_ostream Out(Buffer); 10514 Out << Prefix << ISA << LMask << VLEN; 10515 if (OutputBecomesInput) 10516 Out << "v"; 10517 Out << ParSeq << "_" << MangledName; 10518 Fn->addFnAttr(Out.str()); 10519 } 10520 10521 // Helper function to generate the Advanced SIMD names depending on 10522 // the value of the NDS when simdlen is not present. 10523 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10524 StringRef Prefix, char ISA, 10525 StringRef ParSeq, StringRef MangledName, 10526 bool OutputBecomesInput, 10527 llvm::Function *Fn) { 10528 switch (NDS) { 10529 case 8: 10530 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10531 OutputBecomesInput, Fn); 10532 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10533 OutputBecomesInput, Fn); 10534 break; 10535 case 16: 10536 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10537 OutputBecomesInput, Fn); 10538 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10539 OutputBecomesInput, Fn); 10540 break; 10541 case 32: 10542 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10543 OutputBecomesInput, Fn); 10544 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10545 OutputBecomesInput, Fn); 10546 break; 10547 case 64: 10548 case 128: 10549 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10550 OutputBecomesInput, Fn); 10551 break; 10552 default: 10553 llvm_unreachable("Scalar type is too wide."); 10554 } 10555 } 10556 10557 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10558 static void emitAArch64DeclareSimdFunction( 10559 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10560 ArrayRef<ParamAttrTy> ParamAttrs, 10561 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10562 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10563 10564 // Get basic data for building the vector signature. 10565 const auto Data = getNDSWDS(FD, ParamAttrs); 10566 const unsigned NDS = std::get<0>(Data); 10567 const unsigned WDS = std::get<1>(Data); 10568 const bool OutputBecomesInput = std::get<2>(Data); 10569 10570 // Check the values provided via `simdlen` by the user. 10571 // 1. A `simdlen(1)` doesn't produce vector signatures, 10572 if (UserVLEN == 1) { 10573 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10574 DiagnosticsEngine::Warning, 10575 "The clause simdlen(1) has no effect when targeting aarch64."); 10576 CGM.getDiags().Report(SLoc, DiagID); 10577 return; 10578 } 10579 10580 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10581 // Advanced SIMD output. 10582 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10583 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10584 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10585 "power of 2 when targeting Advanced SIMD."); 10586 CGM.getDiags().Report(SLoc, DiagID); 10587 return; 10588 } 10589 10590 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10591 // limits. 10592 if (ISA == 's' && UserVLEN != 0) { 10593 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10594 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10595 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10596 "lanes in the architectural constraints " 10597 "for SVE (min is 128-bit, max is " 10598 "2048-bit, by steps of 128-bit)"); 10599 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10600 return; 10601 } 10602 } 10603 10604 // Sort out parameter sequence. 10605 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10606 StringRef Prefix = "_ZGV"; 10607 // Generate simdlen from user input (if any). 10608 if (UserVLEN) { 10609 if (ISA == 's') { 10610 // SVE generates only a masked function. 10611 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10612 OutputBecomesInput, Fn); 10613 } else { 10614 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10615 // Advanced SIMD generates one or two functions, depending on 10616 // the `[not]inbranch` clause. 10617 switch (State) { 10618 case OMPDeclareSimdDeclAttr::BS_Undefined: 10619 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10620 OutputBecomesInput, Fn); 10621 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10622 OutputBecomesInput, Fn); 10623 break; 10624 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10625 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10626 OutputBecomesInput, Fn); 10627 break; 10628 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10630 OutputBecomesInput, Fn); 10631 break; 10632 } 10633 } 10634 } else { 10635 // If no user simdlen is provided, follow the AAVFABI rules for 10636 // generating the vector length. 10637 if (ISA == 's') { 10638 // SVE, section 3.4.1, item 1. 10639 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10640 OutputBecomesInput, Fn); 10641 } else { 10642 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10643 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10644 // two vector names depending on the use of the clause 10645 // `[not]inbranch`. 10646 switch (State) { 10647 case OMPDeclareSimdDeclAttr::BS_Undefined: 10648 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10649 OutputBecomesInput, Fn); 10650 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10651 OutputBecomesInput, Fn); 10652 break; 10653 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10654 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10655 OutputBecomesInput, Fn); 10656 break; 10657 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10658 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10659 OutputBecomesInput, Fn); 10660 break; 10661 } 10662 } 10663 } 10664 } 10665 10666 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10667 llvm::Function *Fn) { 10668 ASTContext &C = CGM.getContext(); 10669 FD = FD->getMostRecentDecl(); 10670 // Map params to their positions in function decl. 10671 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10672 if (isa<CXXMethodDecl>(FD)) 10673 ParamPositions.try_emplace(FD, 0); 10674 unsigned ParamPos = ParamPositions.size(); 10675 for (const ParmVarDecl *P : FD->parameters()) { 10676 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10677 ++ParamPos; 10678 } 10679 while (FD) { 10680 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10681 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10682 // Mark uniform parameters. 10683 for (const Expr *E : Attr->uniforms()) { 10684 E = E->IgnoreParenImpCasts(); 10685 unsigned Pos; 10686 if (isa<CXXThisExpr>(E)) { 10687 Pos = ParamPositions[FD]; 10688 } else { 10689 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10690 ->getCanonicalDecl(); 10691 Pos = ParamPositions[PVD]; 10692 } 10693 ParamAttrs[Pos].Kind = Uniform; 10694 } 10695 // Get alignment info. 10696 auto NI = Attr->alignments_begin(); 10697 for (const Expr *E : Attr->aligneds()) { 10698 E = E->IgnoreParenImpCasts(); 10699 unsigned Pos; 10700 QualType ParmTy; 10701 if (isa<CXXThisExpr>(E)) { 10702 Pos = ParamPositions[FD]; 10703 ParmTy = E->getType(); 10704 } else { 10705 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10706 ->getCanonicalDecl(); 10707 Pos = ParamPositions[PVD]; 10708 ParmTy = PVD->getType(); 10709 } 10710 ParamAttrs[Pos].Alignment = 10711 (*NI) 10712 ? (*NI)->EvaluateKnownConstInt(C) 10713 : llvm::APSInt::getUnsigned( 10714 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10715 .getQuantity()); 10716 ++NI; 10717 } 10718 // Mark linear parameters. 10719 auto SI = Attr->steps_begin(); 10720 auto MI = Attr->modifiers_begin(); 10721 for (const Expr *E : Attr->linears()) { 10722 E = E->IgnoreParenImpCasts(); 10723 unsigned Pos; 10724 if (isa<CXXThisExpr>(E)) { 10725 Pos = ParamPositions[FD]; 10726 } else { 10727 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10728 ->getCanonicalDecl(); 10729 Pos = ParamPositions[PVD]; 10730 } 10731 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10732 ParamAttr.Kind = Linear; 10733 if (*SI) { 10734 Expr::EvalResult Result; 10735 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10736 if (const auto *DRE = 10737 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10738 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10739 ParamAttr.Kind = LinearWithVarStride; 10740 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10741 ParamPositions[StridePVD->getCanonicalDecl()]); 10742 } 10743 } 10744 } else { 10745 ParamAttr.StrideOrArg = Result.Val.getInt(); 10746 } 10747 } 10748 ++SI; 10749 ++MI; 10750 } 10751 llvm::APSInt VLENVal; 10752 SourceLocation ExprLoc; 10753 const Expr *VLENExpr = Attr->getSimdlen(); 10754 if (VLENExpr) { 10755 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10756 ExprLoc = VLENExpr->getExprLoc(); 10757 } 10758 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10759 if (CGM.getTriple().isX86()) { 10760 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10761 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10762 unsigned VLEN = VLENVal.getExtValue(); 10763 StringRef MangledName = Fn->getName(); 10764 if (CGM.getTarget().hasFeature("sve")) 10765 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10766 MangledName, 's', 128, Fn, ExprLoc); 10767 if (CGM.getTarget().hasFeature("neon")) 10768 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10769 MangledName, 'n', 128, Fn, ExprLoc); 10770 } 10771 } 10772 FD = FD->getPreviousDecl(); 10773 } 10774 } 10775 10776 namespace { 10777 /// Cleanup action for doacross support. 10778 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10779 public: 10780 static const int DoacrossFinArgs = 2; 10781 10782 private: 10783 llvm::FunctionCallee RTLFn; 10784 llvm::Value *Args[DoacrossFinArgs]; 10785 10786 public: 10787 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10788 ArrayRef<llvm::Value *> CallArgs) 10789 : RTLFn(RTLFn) { 10790 assert(CallArgs.size() == DoacrossFinArgs); 10791 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10792 } 10793 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10794 if (!CGF.HaveInsertPoint()) 10795 return; 10796 CGF.EmitRuntimeCall(RTLFn, Args); 10797 } 10798 }; 10799 } // namespace 10800 10801 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10802 const OMPLoopDirective &D, 10803 ArrayRef<Expr *> NumIterations) { 10804 if (!CGF.HaveInsertPoint()) 10805 return; 10806 10807 ASTContext &C = CGM.getContext(); 10808 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10809 RecordDecl *RD; 10810 if (KmpDimTy.isNull()) { 10811 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10812 // kmp_int64 lo; // lower 10813 // kmp_int64 up; // upper 10814 // kmp_int64 st; // stride 10815 // }; 10816 RD = C.buildImplicitRecord("kmp_dim"); 10817 RD->startDefinition(); 10818 addFieldToRecordDecl(C, RD, Int64Ty); 10819 addFieldToRecordDecl(C, RD, Int64Ty); 10820 addFieldToRecordDecl(C, RD, Int64Ty); 10821 RD->completeDefinition(); 10822 KmpDimTy = C.getRecordType(RD); 10823 } else { 10824 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10825 } 10826 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10827 QualType ArrayTy = 10828 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10829 10830 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10831 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10832 enum { LowerFD = 0, UpperFD, StrideFD }; 10833 // Fill dims with data. 10834 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10835 LValue DimsLVal = CGF.MakeAddrLValue( 10836 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10837 // dims.upper = num_iterations; 10838 LValue UpperLVal = CGF.EmitLValueForField( 10839 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10840 llvm::Value *NumIterVal = 10841 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10842 D.getNumIterations()->getType(), Int64Ty, 10843 D.getNumIterations()->getExprLoc()); 10844 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10845 // dims.stride = 1; 10846 LValue StrideLVal = CGF.EmitLValueForField( 10847 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10848 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10849 StrideLVal); 10850 } 10851 10852 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10853 // kmp_int32 num_dims, struct kmp_dim * dims); 10854 llvm::Value *Args[] = { 10855 emitUpdateLocation(CGF, D.getBeginLoc()), 10856 getThreadID(CGF, D.getBeginLoc()), 10857 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10858 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10859 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10860 CGM.VoidPtrTy)}; 10861 10862 llvm::FunctionCallee RTLFn = 10863 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10864 CGF.EmitRuntimeCall(RTLFn, Args); 10865 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10866 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10867 llvm::FunctionCallee FiniRTLFn = 10868 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10869 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10870 llvm::makeArrayRef(FiniArgs)); 10871 } 10872 10873 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10874 const OMPDependClause *C) { 10875 QualType Int64Ty = 10876 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10877 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10878 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10879 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10880 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10881 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10882 const Expr *CounterVal = C->getLoopData(I); 10883 assert(CounterVal); 10884 llvm::Value *CntVal = CGF.EmitScalarConversion( 10885 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10886 CounterVal->getExprLoc()); 10887 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10888 /*Volatile=*/false, Int64Ty); 10889 } 10890 llvm::Value *Args[] = { 10891 emitUpdateLocation(CGF, C->getBeginLoc()), 10892 getThreadID(CGF, C->getBeginLoc()), 10893 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10894 llvm::FunctionCallee RTLFn; 10895 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10896 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10897 } else { 10898 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10899 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10900 } 10901 CGF.EmitRuntimeCall(RTLFn, Args); 10902 } 10903 10904 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10905 llvm::FunctionCallee Callee, 10906 ArrayRef<llvm::Value *> Args) const { 10907 assert(Loc.isValid() && "Outlined function call location must be valid."); 10908 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10909 10910 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10911 if (Fn->doesNotThrow()) { 10912 CGF.EmitNounwindRuntimeCall(Fn, Args); 10913 return; 10914 } 10915 } 10916 CGF.EmitRuntimeCall(Callee, Args); 10917 } 10918 10919 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10920 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10921 ArrayRef<llvm::Value *> Args) const { 10922 emitCall(CGF, Loc, OutlinedFn, Args); 10923 } 10924 10925 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10926 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10927 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10928 HasEmittedDeclareTargetRegion = true; 10929 } 10930 10931 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10932 const VarDecl *NativeParam, 10933 const VarDecl *TargetParam) const { 10934 return CGF.GetAddrOfLocalVar(NativeParam); 10935 } 10936 10937 namespace { 10938 /// Cleanup action for allocate support. 10939 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10940 public: 10941 static const int CleanupArgs = 3; 10942 10943 private: 10944 llvm::FunctionCallee RTLFn; 10945 llvm::Value *Args[CleanupArgs]; 10946 10947 public: 10948 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10949 ArrayRef<llvm::Value *> CallArgs) 10950 : RTLFn(RTLFn) { 10951 assert(CallArgs.size() == CleanupArgs && 10952 "Size of arguments does not match."); 10953 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10954 } 10955 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10956 if (!CGF.HaveInsertPoint()) 10957 return; 10958 CGF.EmitRuntimeCall(RTLFn, Args); 10959 } 10960 }; 10961 } // namespace 10962 10963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10964 const VarDecl *VD) { 10965 if (!VD) 10966 return Address::invalid(); 10967 const VarDecl *CVD = VD->getCanonicalDecl(); 10968 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10969 return Address::invalid(); 10970 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10971 // Use the default allocation. 10972 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10973 !AA->getAllocator()) 10974 return Address::invalid(); 10975 llvm::Value *Size; 10976 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10977 if (CVD->getType()->isVariablyModifiedType()) { 10978 Size = CGF.getTypeSize(CVD->getType()); 10979 // Align the size: ((size + align - 1) / align) * align 10980 Size = CGF.Builder.CreateNUWAdd( 10981 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10982 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10983 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10984 } else { 10985 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10986 Size = CGM.getSize(Sz.alignTo(Align)); 10987 } 10988 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10989 assert(AA->getAllocator() && 10990 "Expected allocator expression for non-default allocator."); 10991 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10992 // According to the standard, the original allocator type is a enum (integer). 10993 // Convert to pointer type, if required. 10994 if (Allocator->getType()->isIntegerTy()) 10995 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10996 else if (Allocator->getType()->isPointerTy()) 10997 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10998 CGM.VoidPtrTy); 10999 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11000 11001 llvm::Value *Addr = 11002 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11003 CVD->getName() + ".void.addr"); 11004 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11005 Allocator}; 11006 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11007 11008 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11009 llvm::makeArrayRef(FiniArgs)); 11010 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11011 Addr, 11012 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11013 CVD->getName() + ".addr"); 11014 return Address(Addr, Align); 11015 } 11016 11017 namespace { 11018 using OMPContextSelectorData = 11019 OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; 11020 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; 11021 } // anonymous namespace 11022 11023 /// Checks current context and returns true if it matches the context selector. 11024 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, 11025 typename... Arguments> 11026 static bool checkContext(const OMPContextSelectorData &Data, 11027 Arguments... Params) { 11028 assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && 11029 "Unknown context selector or context selector set."); 11030 return false; 11031 } 11032 11033 /// Checks for implementation={vendor(<vendor>)} context selector. 11034 /// \returns true iff <vendor>="llvm", false otherwise. 11035 template <> 11036 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( 11037 const OMPContextSelectorData &Data) { 11038 return llvm::all_of(Data.Names, 11039 [](StringRef S) { return !S.compare_lower("llvm"); }); 11040 } 11041 11042 /// Checks for device={kind(<kind>)} context selector. 11043 /// \returns true if <kind>="host" and compilation is for host. 11044 /// true if <kind>="nohost" and compilation is for device. 11045 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. 11046 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. 11047 /// false otherwise. 11048 template <> 11049 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( 11050 const OMPContextSelectorData &Data, CodeGenModule &CGM) { 11051 for (StringRef Name : Data.Names) { 11052 if (!Name.compare_lower("host")) { 11053 if (CGM.getLangOpts().OpenMPIsDevice) 11054 return false; 11055 continue; 11056 } 11057 if (!Name.compare_lower("nohost")) { 11058 if (!CGM.getLangOpts().OpenMPIsDevice) 11059 return false; 11060 continue; 11061 } 11062 switch (CGM.getTriple().getArch()) { 11063 case llvm::Triple::arm: 11064 case llvm::Triple::armeb: 11065 case llvm::Triple::aarch64: 11066 case llvm::Triple::aarch64_be: 11067 case llvm::Triple::aarch64_32: 11068 case llvm::Triple::ppc: 11069 case llvm::Triple::ppc64: 11070 case llvm::Triple::ppc64le: 11071 case llvm::Triple::x86: 11072 case llvm::Triple::x86_64: 11073 if (Name.compare_lower("cpu")) 11074 return false; 11075 break; 11076 case llvm::Triple::amdgcn: 11077 case llvm::Triple::nvptx: 11078 case llvm::Triple::nvptx64: 11079 if (Name.compare_lower("gpu")) 11080 return false; 11081 break; 11082 case llvm::Triple::UnknownArch: 11083 case llvm::Triple::arc: 11084 case llvm::Triple::avr: 11085 case llvm::Triple::bpfel: 11086 case llvm::Triple::bpfeb: 11087 case llvm::Triple::hexagon: 11088 case llvm::Triple::mips: 11089 case llvm::Triple::mipsel: 11090 case llvm::Triple::mips64: 11091 case llvm::Triple::mips64el: 11092 case llvm::Triple::msp430: 11093 case llvm::Triple::r600: 11094 case llvm::Triple::riscv32: 11095 case llvm::Triple::riscv64: 11096 case llvm::Triple::sparc: 11097 case llvm::Triple::sparcv9: 11098 case llvm::Triple::sparcel: 11099 case llvm::Triple::systemz: 11100 case llvm::Triple::tce: 11101 case llvm::Triple::tcele: 11102 case llvm::Triple::thumb: 11103 case llvm::Triple::thumbeb: 11104 case llvm::Triple::xcore: 11105 case llvm::Triple::le32: 11106 case llvm::Triple::le64: 11107 case llvm::Triple::amdil: 11108 case llvm::Triple::amdil64: 11109 case llvm::Triple::hsail: 11110 case llvm::Triple::hsail64: 11111 case llvm::Triple::spir: 11112 case llvm::Triple::spir64: 11113 case llvm::Triple::kalimba: 11114 case llvm::Triple::shave: 11115 case llvm::Triple::lanai: 11116 case llvm::Triple::wasm32: 11117 case llvm::Triple::wasm64: 11118 case llvm::Triple::renderscript32: 11119 case llvm::Triple::renderscript64: 11120 case llvm::Triple::ve: 11121 return false; 11122 } 11123 } 11124 return true; 11125 } 11126 11127 static bool matchesContext(CodeGenModule &CGM, 11128 const CompleteOMPContextSelectorData &ContextData) { 11129 for (const OMPContextSelectorData &Data : ContextData) { 11130 switch (Data.Ctx) { 11131 case OMP_CTX_vendor: 11132 assert(Data.CtxSet == OMP_CTX_SET_implementation && 11133 "Expected implementation context selector set."); 11134 if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) 11135 return false; 11136 break; 11137 case OMP_CTX_kind: 11138 assert(Data.CtxSet == OMP_CTX_SET_device && 11139 "Expected device context selector set."); 11140 if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, 11141 CGM)) 11142 return false; 11143 break; 11144 case OMP_CTX_unknown: 11145 llvm_unreachable("Unknown context selector kind."); 11146 } 11147 } 11148 return true; 11149 } 11150 11151 static CompleteOMPContextSelectorData 11152 translateAttrToContextSelectorData(ASTContext &C, 11153 const OMPDeclareVariantAttr *A) { 11154 CompleteOMPContextSelectorData Data; 11155 for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { 11156 Data.emplace_back(); 11157 auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( 11158 *std::next(A->ctxSelectorSets_begin(), I)); 11159 auto Ctx = static_cast<OpenMPContextSelectorKind>( 11160 *std::next(A->ctxSelectors_begin(), I)); 11161 Data.back().CtxSet = CtxSet; 11162 Data.back().Ctx = Ctx; 11163 const Expr *Score = *std::next(A->scores_begin(), I); 11164 Data.back().Score = Score->EvaluateKnownConstInt(C); 11165 switch (Ctx) { 11166 case OMP_CTX_vendor: 11167 assert(CtxSet == OMP_CTX_SET_implementation && 11168 "Expected implementation context selector set."); 11169 Data.back().Names = 11170 llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); 11171 break; 11172 case OMP_CTX_kind: 11173 assert(CtxSet == OMP_CTX_SET_device && 11174 "Expected device context selector set."); 11175 Data.back().Names = 11176 llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); 11177 break; 11178 case OMP_CTX_unknown: 11179 llvm_unreachable("Unknown context selector kind."); 11180 } 11181 } 11182 return Data; 11183 } 11184 11185 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, 11186 const CompleteOMPContextSelectorData &RHS) { 11187 llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; 11188 for (const OMPContextSelectorData &D : RHS) { 11189 auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); 11190 Pair.getSecond().insert(D.Names.begin(), D.Names.end()); 11191 } 11192 bool AllSetsAreEqual = true; 11193 for (const OMPContextSelectorData &D : LHS) { 11194 auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); 11195 if (It == RHSData.end()) 11196 return false; 11197 if (D.Names.size() > It->getSecond().size()) 11198 return false; 11199 if (llvm::set_union(It->getSecond(), D.Names)) 11200 return false; 11201 AllSetsAreEqual = 11202 AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); 11203 } 11204 11205 return LHS.size() != RHS.size() || !AllSetsAreEqual; 11206 } 11207 11208 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, 11209 const CompleteOMPContextSelectorData &RHS) { 11210 // Score is calculated as sum of all scores + 1. 11211 llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11212 bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); 11213 if (RHSIsSubsetOfLHS) { 11214 LHSScore = llvm::APSInt::get(0); 11215 } else { 11216 for (const OMPContextSelectorData &Data : LHS) { 11217 if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { 11218 LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11219 } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { 11220 LHSScore += Data.Score.extend(LHSScore.getBitWidth()); 11221 } else { 11222 LHSScore += Data.Score; 11223 } 11224 } 11225 } 11226 llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11227 if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { 11228 RHSScore = llvm::APSInt::get(0); 11229 } else { 11230 for (const OMPContextSelectorData &Data : RHS) { 11231 if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { 11232 RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11233 } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { 11234 RHSScore += Data.Score.extend(RHSScore.getBitWidth()); 11235 } else { 11236 RHSScore += Data.Score; 11237 } 11238 } 11239 } 11240 return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; 11241 } 11242 11243 /// Finds the variant function that matches current context with its context 11244 /// selector. 11245 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11246 const FunctionDecl *FD) { 11247 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11248 return FD; 11249 // Iterate through all DeclareVariant attributes and check context selectors. 11250 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11251 CompleteOMPContextSelectorData TopMostData; 11252 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11253 CompleteOMPContextSelectorData Data = 11254 translateAttrToContextSelectorData(CGM.getContext(), A); 11255 if (!matchesContext(CGM, Data)) 11256 continue; 11257 // If the attribute matches the context, find the attribute with the highest 11258 // score. 11259 if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { 11260 TopMostAttr = A; 11261 TopMostData.swap(Data); 11262 } 11263 } 11264 if (!TopMostAttr) 11265 return FD; 11266 return cast<FunctionDecl>( 11267 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11268 ->getDecl()); 11269 } 11270 11271 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11272 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11273 // If the original function is defined already, use its definition. 11274 StringRef MangledName = CGM.getMangledName(GD); 11275 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11276 if (Orig && !Orig->isDeclaration()) 11277 return false; 11278 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11279 // Emit original function if it does not have declare variant attribute or the 11280 // context does not match. 11281 if (NewFD == D) 11282 return false; 11283 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11284 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11285 DeferredVariantFunction.erase(D); 11286 return true; 11287 } 11288 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11289 return true; 11290 } 11291 11292 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11293 CodeGenModule &CGM, const OMPLoopDirective &S) 11294 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11295 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11296 if (!NeedToPush) 11297 return; 11298 NontemporalDeclsSet &DS = 11299 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11300 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11301 for (const Stmt *Ref : C->private_refs()) { 11302 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11303 const ValueDecl *VD; 11304 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11305 VD = DRE->getDecl(); 11306 } else { 11307 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11308 assert((ME->isImplicitCXXThis() || 11309 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11310 "Expected member of current class."); 11311 VD = ME->getMemberDecl(); 11312 } 11313 DS.insert(VD); 11314 } 11315 } 11316 } 11317 11318 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11319 if (!NeedToPush) 11320 return; 11321 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11322 } 11323 11324 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11325 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11326 11327 return llvm::any_of( 11328 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11329 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11330 } 11331 11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11333 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11334 : CGM(CGF.CGM), 11335 NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11336 [](const OMPLastprivateClause *C) { 11337 return C->getKind() == 11338 OMPC_LASTPRIVATE_conditional; 11339 })) { 11340 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11341 if (!NeedToPush) 11342 return; 11343 LastprivateConditionalData &Data = 11344 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11345 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11346 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11347 continue; 11348 11349 for (const Expr *Ref : C->varlists()) { 11350 Data.DeclToUniqeName.try_emplace( 11351 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11352 generateUniqueName(CGM, "pl_cond", Ref)); 11353 } 11354 } 11355 Data.IVLVal = IVLVal; 11356 // In simd only mode or for simd directives no need to generate threadprivate 11357 // references for the loop iteration counter, we can use the original one 11358 // since outlining cannot happen in simd regions. 11359 if (CGF.getLangOpts().OpenMPSimd || 11360 isOpenMPSimdDirective(S.getDirectiveKind())) { 11361 Data.UseOriginalIV = true; 11362 return; 11363 } 11364 llvm::SmallString<16> Buffer; 11365 llvm::raw_svector_ostream OS(Buffer); 11366 PresumedLoc PLoc = 11367 CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); 11368 assert(PLoc.isValid() && "Source location is expected to be always valid."); 11369 11370 llvm::sys::fs::UniqueID ID; 11371 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 11372 CGM.getDiags().Report(diag::err_cannot_open_file) 11373 << PLoc.getFilename() << EC.message(); 11374 OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" 11375 << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; 11376 Data.IVName = OS.str(); 11377 } 11378 11379 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11380 if (!NeedToPush) 11381 return; 11382 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11383 } 11384 11385 void CGOpenMPRuntime::initLastprivateConditionalCounter( 11386 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11387 if (CGM.getLangOpts().OpenMPSimd || 11388 !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11389 [](const OMPLastprivateClause *C) { 11390 return C->getKind() == OMPC_LASTPRIVATE_conditional; 11391 })) 11392 return; 11393 const CGOpenMPRuntime::LastprivateConditionalData &Data = 11394 LastprivateConditionalStack.back(); 11395 if (Data.UseOriginalIV) 11396 return; 11397 // Global loop counter. Required to handle inner parallel-for regions. 11398 // global_iv = iv; 11399 Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 11400 CGF, Data.IVLVal.getType(), Data.IVName); 11401 LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); 11402 llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); 11403 CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); 11404 } 11405 11406 namespace { 11407 /// Checks if the lastprivate conditional variable is referenced in LHS. 11408 class LastprivateConditionalRefChecker final 11409 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11410 CodeGenFunction &CGF; 11411 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11412 const Expr *FoundE = nullptr; 11413 const Decl *FoundD = nullptr; 11414 StringRef UniqueDeclName; 11415 LValue IVLVal; 11416 StringRef IVName; 11417 SourceLocation Loc; 11418 bool UseOriginalIV = false; 11419 11420 public: 11421 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11422 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11423 llvm::reverse(LPM)) { 11424 auto It = D.DeclToUniqeName.find(E->getDecl()); 11425 if (It == D.DeclToUniqeName.end()) 11426 continue; 11427 FoundE = E; 11428 FoundD = E->getDecl()->getCanonicalDecl(); 11429 UniqueDeclName = It->getSecond(); 11430 IVLVal = D.IVLVal; 11431 IVName = D.IVName; 11432 UseOriginalIV = D.UseOriginalIV; 11433 break; 11434 } 11435 return FoundE == E; 11436 } 11437 bool VisitMemberExpr(const MemberExpr *E) { 11438 if (!CGF.IsWrappedCXXThis(E->getBase())) 11439 return false; 11440 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11441 llvm::reverse(LPM)) { 11442 auto It = D.DeclToUniqeName.find(E->getMemberDecl()); 11443 if (It == D.DeclToUniqeName.end()) 11444 continue; 11445 FoundE = E; 11446 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11447 UniqueDeclName = It->getSecond(); 11448 IVLVal = D.IVLVal; 11449 IVName = D.IVName; 11450 UseOriginalIV = D.UseOriginalIV; 11451 break; 11452 } 11453 return FoundE == E; 11454 } 11455 bool VisitStmt(const Stmt *S) { 11456 for (const Stmt *Child : S->children()) { 11457 if (!Child) 11458 continue; 11459 if (const auto *E = dyn_cast<Expr>(Child)) 11460 if (!E->isGLValue()) 11461 continue; 11462 if (Visit(Child)) 11463 return true; 11464 } 11465 return false; 11466 } 11467 explicit LastprivateConditionalRefChecker( 11468 CodeGenFunction &CGF, 11469 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11470 : CGF(CGF), LPM(LPM) {} 11471 std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> 11472 getFoundData() const { 11473 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, 11474 UseOriginalIV); 11475 } 11476 }; 11477 } // namespace 11478 11479 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11480 const Expr *LHS) { 11481 if (CGF.getLangOpts().OpenMP < 50) 11482 return; 11483 LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); 11484 if (!Checker.Visit(LHS)) 11485 return; 11486 const Expr *FoundE; 11487 const Decl *FoundD; 11488 StringRef UniqueDeclName; 11489 LValue IVLVal; 11490 StringRef IVName; 11491 bool UseOriginalIV; 11492 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = 11493 Checker.getFoundData(); 11494 11495 // Last updated loop counter for the lastprivate conditional var. 11496 // int<xx> last_iv = 0; 11497 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11498 llvm::Constant *LastIV = 11499 getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); 11500 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11501 IVLVal.getAlignment().getAsAlign()); 11502 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11503 11504 // Private address of the lastprivate conditional in the current context. 11505 // priv_a 11506 LValue LVal = CGF.EmitLValue(FoundE); 11507 // Last value of the lastprivate conditional. 11508 // decltype(priv_a) last_a; 11509 llvm::Constant *Last = getOrCreateInternalVariable( 11510 LVal.getAddress(CGF).getElementType(), UniqueDeclName); 11511 cast<llvm::GlobalVariable>(Last)->setAlignment( 11512 LVal.getAlignment().getAsAlign()); 11513 LValue LastLVal = 11514 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11515 11516 // Global loop counter. Required to handle inner parallel-for regions. 11517 // global_iv 11518 if (!UseOriginalIV) { 11519 Address IVAddr = 11520 getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); 11521 IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); 11522 } 11523 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); 11524 11525 // #pragma omp critical(a) 11526 // if (last_iv <= iv) { 11527 // last_iv = iv; 11528 // last_a = priv_a; 11529 // } 11530 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11531 FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { 11532 Action.Enter(CGF); 11533 llvm::Value *LastIVVal = 11534 CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); 11535 // (last_iv <= global_iv) ? Check if the variable is updated and store new 11536 // value in global var. 11537 llvm::Value *CmpRes; 11538 if (IVLVal.getType()->isSignedIntegerType()) { 11539 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11540 } else { 11541 assert(IVLVal.getType()->isUnsignedIntegerType() && 11542 "Loop iteration variable must be integer."); 11543 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11544 } 11545 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11547 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11548 // { 11549 CGF.EmitBlock(ThenBB); 11550 11551 // last_iv = global_iv; 11552 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11553 11554 // last_a = priv_a; 11555 switch (CGF.getEvaluationKind(LVal.getType())) { 11556 case TEK_Scalar: { 11557 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); 11558 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11559 break; 11560 } 11561 case TEK_Complex: { 11562 CodeGenFunction::ComplexPairTy PrivVal = 11563 CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); 11564 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11565 break; 11566 } 11567 case TEK_Aggregate: 11568 llvm_unreachable( 11569 "Aggregates are not supported in lastprivate conditional."); 11570 } 11571 // } 11572 CGF.EmitBranch(ExitBB); 11573 // There is no need to emit line number for unconditional branch. 11574 (void)ApplyDebugLocation::CreateEmpty(CGF); 11575 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11576 }; 11577 11578 if (CGM.getLangOpts().OpenMPSimd) { 11579 // Do not emit as a critical region as no parallel region could be emitted. 11580 RegionCodeGenTy ThenRCG(CodeGen); 11581 ThenRCG(CGF); 11582 } else { 11583 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); 11584 } 11585 } 11586 11587 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11588 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11589 SourceLocation Loc) { 11590 if (CGF.getLangOpts().OpenMP < 50) 11591 return; 11592 auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); 11593 assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && 11594 "Unknown lastprivate conditional variable."); 11595 StringRef UniqueName = It->getSecond(); 11596 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11597 // The variable was not updated in the region - exit. 11598 if (!GV) 11599 return; 11600 LValue LPLVal = CGF.MakeAddrLValue( 11601 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11602 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11603 CGF.EmitStoreOfScalar(Res, PrivLVal); 11604 } 11605 11606 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11607 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11608 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11609 llvm_unreachable("Not supported in SIMD-only mode"); 11610 } 11611 11612 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11613 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11614 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11615 llvm_unreachable("Not supported in SIMD-only mode"); 11616 } 11617 11618 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11619 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11620 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11621 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11622 bool Tied, unsigned &NumberOfParts) { 11623 llvm_unreachable("Not supported in SIMD-only mode"); 11624 } 11625 11626 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11627 SourceLocation Loc, 11628 llvm::Function *OutlinedFn, 11629 ArrayRef<llvm::Value *> CapturedVars, 11630 const Expr *IfCond) { 11631 llvm_unreachable("Not supported in SIMD-only mode"); 11632 } 11633 11634 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11635 CodeGenFunction &CGF, StringRef CriticalName, 11636 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11637 const Expr *Hint) { 11638 llvm_unreachable("Not supported in SIMD-only mode"); 11639 } 11640 11641 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11642 const RegionCodeGenTy &MasterOpGen, 11643 SourceLocation Loc) { 11644 llvm_unreachable("Not supported in SIMD-only mode"); 11645 } 11646 11647 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11648 SourceLocation Loc) { 11649 llvm_unreachable("Not supported in SIMD-only mode"); 11650 } 11651 11652 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11653 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11654 SourceLocation Loc) { 11655 llvm_unreachable("Not supported in SIMD-only mode"); 11656 } 11657 11658 void CGOpenMPSIMDRuntime::emitSingleRegion( 11659 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11660 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11661 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11662 ArrayRef<const Expr *> AssignmentOps) { 11663 llvm_unreachable("Not supported in SIMD-only mode"); 11664 } 11665 11666 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11667 const RegionCodeGenTy &OrderedOpGen, 11668 SourceLocation Loc, 11669 bool IsThreads) { 11670 llvm_unreachable("Not supported in SIMD-only mode"); 11671 } 11672 11673 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11674 SourceLocation Loc, 11675 OpenMPDirectiveKind Kind, 11676 bool EmitChecks, 11677 bool ForceSimpleCall) { 11678 llvm_unreachable("Not supported in SIMD-only mode"); 11679 } 11680 11681 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11682 CodeGenFunction &CGF, SourceLocation Loc, 11683 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11684 bool Ordered, const DispatchRTInput &DispatchValues) { 11685 llvm_unreachable("Not supported in SIMD-only mode"); 11686 } 11687 11688 void CGOpenMPSIMDRuntime::emitForStaticInit( 11689 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11690 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11691 llvm_unreachable("Not supported in SIMD-only mode"); 11692 } 11693 11694 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11695 CodeGenFunction &CGF, SourceLocation Loc, 11696 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11697 llvm_unreachable("Not supported in SIMD-only mode"); 11698 } 11699 11700 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11701 SourceLocation Loc, 11702 unsigned IVSize, 11703 bool IVSigned) { 11704 llvm_unreachable("Not supported in SIMD-only mode"); 11705 } 11706 11707 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11708 SourceLocation Loc, 11709 OpenMPDirectiveKind DKind) { 11710 llvm_unreachable("Not supported in SIMD-only mode"); 11711 } 11712 11713 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11714 SourceLocation Loc, 11715 unsigned IVSize, bool IVSigned, 11716 Address IL, Address LB, 11717 Address UB, Address ST) { 11718 llvm_unreachable("Not supported in SIMD-only mode"); 11719 } 11720 11721 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11722 llvm::Value *NumThreads, 11723 SourceLocation Loc) { 11724 llvm_unreachable("Not supported in SIMD-only mode"); 11725 } 11726 11727 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11728 ProcBindKind ProcBind, 11729 SourceLocation Loc) { 11730 llvm_unreachable("Not supported in SIMD-only mode"); 11731 } 11732 11733 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11734 const VarDecl *VD, 11735 Address VDAddr, 11736 SourceLocation Loc) { 11737 llvm_unreachable("Not supported in SIMD-only mode"); 11738 } 11739 11740 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11741 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11742 CodeGenFunction *CGF) { 11743 llvm_unreachable("Not supported in SIMD-only mode"); 11744 } 11745 11746 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11747 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11748 llvm_unreachable("Not supported in SIMD-only mode"); 11749 } 11750 11751 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11752 ArrayRef<const Expr *> Vars, 11753 SourceLocation Loc) { 11754 llvm_unreachable("Not supported in SIMD-only mode"); 11755 } 11756 11757 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11758 const OMPExecutableDirective &D, 11759 llvm::Function *TaskFunction, 11760 QualType SharedsTy, Address Shareds, 11761 const Expr *IfCond, 11762 const OMPTaskDataTy &Data) { 11763 llvm_unreachable("Not supported in SIMD-only mode"); 11764 } 11765 11766 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11767 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11768 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11769 const Expr *IfCond, const OMPTaskDataTy &Data) { 11770 llvm_unreachable("Not supported in SIMD-only mode"); 11771 } 11772 11773 void CGOpenMPSIMDRuntime::emitReduction( 11774 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11775 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11776 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11777 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11778 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11779 ReductionOps, Options); 11780 } 11781 11782 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11783 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11784 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11785 llvm_unreachable("Not supported in SIMD-only mode"); 11786 } 11787 11788 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11789 SourceLocation Loc, 11790 ReductionCodeGen &RCG, 11791 unsigned N) { 11792 llvm_unreachable("Not supported in SIMD-only mode"); 11793 } 11794 11795 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11796 SourceLocation Loc, 11797 llvm::Value *ReductionsPtr, 11798 LValue SharedLVal) { 11799 llvm_unreachable("Not supported in SIMD-only mode"); 11800 } 11801 11802 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11803 SourceLocation Loc) { 11804 llvm_unreachable("Not supported in SIMD-only mode"); 11805 } 11806 11807 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11808 CodeGenFunction &CGF, SourceLocation Loc, 11809 OpenMPDirectiveKind CancelRegion) { 11810 llvm_unreachable("Not supported in SIMD-only mode"); 11811 } 11812 11813 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11814 SourceLocation Loc, const Expr *IfCond, 11815 OpenMPDirectiveKind CancelRegion) { 11816 llvm_unreachable("Not supported in SIMD-only mode"); 11817 } 11818 11819 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11820 const OMPExecutableDirective &D, StringRef ParentName, 11821 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11822 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11823 llvm_unreachable("Not supported in SIMD-only mode"); 11824 } 11825 11826 void CGOpenMPSIMDRuntime::emitTargetCall( 11827 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11828 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11829 const Expr *Device, 11830 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11831 const OMPLoopDirective &D)> 11832 SizeEmitter) { 11833 llvm_unreachable("Not supported in SIMD-only mode"); 11834 } 11835 11836 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11837 llvm_unreachable("Not supported in SIMD-only mode"); 11838 } 11839 11840 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11841 llvm_unreachable("Not supported in SIMD-only mode"); 11842 } 11843 11844 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11845 return false; 11846 } 11847 11848 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11849 const OMPExecutableDirective &D, 11850 SourceLocation Loc, 11851 llvm::Function *OutlinedFn, 11852 ArrayRef<llvm::Value *> CapturedVars) { 11853 llvm_unreachable("Not supported in SIMD-only mode"); 11854 } 11855 11856 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11857 const Expr *NumTeams, 11858 const Expr *ThreadLimit, 11859 SourceLocation Loc) { 11860 llvm_unreachable("Not supported in SIMD-only mode"); 11861 } 11862 11863 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11864 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11865 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11866 llvm_unreachable("Not supported in SIMD-only mode"); 11867 } 11868 11869 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11870 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11871 const Expr *Device) { 11872 llvm_unreachable("Not supported in SIMD-only mode"); 11873 } 11874 11875 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11876 const OMPLoopDirective &D, 11877 ArrayRef<Expr *> NumIterations) { 11878 llvm_unreachable("Not supported in SIMD-only mode"); 11879 } 11880 11881 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11882 const OMPDependClause *C) { 11883 llvm_unreachable("Not supported in SIMD-only mode"); 11884 } 11885 11886 const VarDecl * 11887 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11888 const VarDecl *NativeParam) const { 11889 llvm_unreachable("Not supported in SIMD-only mode"); 11890 } 11891 11892 Address 11893 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11894 const VarDecl *NativeParam, 11895 const VarDecl *TargetParam) const { 11896 llvm_unreachable("Not supported in SIMD-only mode"); 11897 } 11898