1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1268 const GlobalDecl &OldGD, 1269 llvm::GlobalValue *OrigAddr, 1270 bool IsForDefinition) { 1271 // Emit at least a definition for the aliasee if the the address of the 1272 // original function is requested. 1273 if (IsForDefinition || OrigAddr) 1274 (void)CGM.GetAddrOfGlobal(NewGD); 1275 StringRef NewMangledName = CGM.getMangledName(NewGD); 1276 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1277 if (Addr && !Addr->isDeclaration()) { 1278 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1279 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); 1280 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1281 1282 // Create a reference to the named value. This ensures that it is emitted 1283 // if a deferred decl. 1284 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1285 1286 // Create the new alias itself, but don't set a name yet. 1287 auto *GA = 1288 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1289 1290 if (OrigAddr) { 1291 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1292 1293 GA->takeName(OrigAddr); 1294 OrigAddr->replaceAllUsesWith( 1295 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1296 OrigAddr->eraseFromParent(); 1297 } else { 1298 GA->setName(CGM.getMangledName(OldGD)); 1299 } 1300 1301 // Set attributes which are particular to an alias; this is a 1302 // specialization of the attributes which may be set on a global function. 1303 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1304 D->isWeakImported()) 1305 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1306 1307 CGM.SetCommonAttributes(OldGD, GA); 1308 return true; 1309 } 1310 return false; 1311 } 1312 1313 void CGOpenMPRuntime::clear() { 1314 InternalVars.clear(); 1315 // Clean non-target variable declarations possibly used only in debug info. 1316 for (const auto &Data : EmittedNonTargetVariables) { 1317 if (!Data.getValue().pointsToAliveValue()) 1318 continue; 1319 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1320 if (!GV) 1321 continue; 1322 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1323 continue; 1324 GV->eraseFromParent(); 1325 } 1326 // Emit aliases for the deferred aliasees. 1327 for (const auto &Pair : DeferredVariantFunction) { 1328 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1329 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1330 // If not able to emit alias, just emit original declaration. 1331 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1332 /*IsForDefinition=*/false); 1333 } 1334 } 1335 1336 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1337 SmallString<128> Buffer; 1338 llvm::raw_svector_ostream OS(Buffer); 1339 StringRef Sep = FirstSeparator; 1340 for (StringRef Part : Parts) { 1341 OS << Sep << Part; 1342 Sep = Separator; 1343 } 1344 return OS.str(); 1345 } 1346 1347 static llvm::Function * 1348 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1349 const Expr *CombinerInitializer, const VarDecl *In, 1350 const VarDecl *Out, bool IsCombiner) { 1351 // void .omp_combiner.(Ty *in, Ty *out); 1352 ASTContext &C = CGM.getContext(); 1353 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1354 FunctionArgList Args; 1355 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1356 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1357 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1359 Args.push_back(&OmpOutParm); 1360 Args.push_back(&OmpInParm); 1361 const CGFunctionInfo &FnInfo = 1362 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1363 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1364 std::string Name = CGM.getOpenMPRuntime().getName( 1365 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1366 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1367 Name, &CGM.getModule()); 1368 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1369 if (CGM.getLangOpts().Optimize) { 1370 Fn->removeFnAttr(llvm::Attribute::NoInline); 1371 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1372 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1373 } 1374 CodeGenFunction CGF(CGM); 1375 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1376 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1377 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1378 Out->getLocation()); 1379 CodeGenFunction::OMPPrivateScope Scope(CGF); 1380 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1381 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1382 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1383 .getAddress(); 1384 }); 1385 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1386 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1387 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1388 .getAddress(); 1389 }); 1390 (void)Scope.Privatize(); 1391 if (!IsCombiner && Out->hasInit() && 1392 !CGF.isTrivialInitializer(Out->getInit())) { 1393 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1394 Out->getType().getQualifiers(), 1395 /*IsInitializer=*/true); 1396 } 1397 if (CombinerInitializer) 1398 CGF.EmitIgnoredExpr(CombinerInitializer); 1399 Scope.ForceCleanup(); 1400 CGF.FinishFunction(); 1401 return Fn; 1402 } 1403 1404 void CGOpenMPRuntime::emitUserDefinedReduction( 1405 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1406 if (UDRMap.count(D) > 0) 1407 return; 1408 llvm::Function *Combiner = emitCombinerOrInitializer( 1409 CGM, D->getType(), D->getCombiner(), 1410 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1411 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1412 /*IsCombiner=*/true); 1413 llvm::Function *Initializer = nullptr; 1414 if (const Expr *Init = D->getInitializer()) { 1415 Initializer = emitCombinerOrInitializer( 1416 CGM, D->getType(), 1417 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1418 : nullptr, 1419 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1420 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1421 /*IsCombiner=*/false); 1422 } 1423 UDRMap.try_emplace(D, Combiner, Initializer); 1424 if (CGF) { 1425 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1426 Decls.second.push_back(D); 1427 } 1428 } 1429 1430 std::pair<llvm::Function *, llvm::Function *> 1431 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1432 auto I = UDRMap.find(D); 1433 if (I != UDRMap.end()) 1434 return I->second; 1435 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1436 return UDRMap.lookup(D); 1437 } 1438 1439 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1440 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1441 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1442 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1443 assert(ThreadIDVar->getType()->isPointerType() && 1444 "thread id variable must be of type kmp_int32 *"); 1445 CodeGenFunction CGF(CGM, true); 1446 bool HasCancel = false; 1447 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1448 HasCancel = OPD->hasCancel(); 1449 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1450 HasCancel = OPSD->hasCancel(); 1451 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1452 HasCancel = OPFD->hasCancel(); 1453 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1454 HasCancel = OPFD->hasCancel(); 1455 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1456 HasCancel = OPFD->hasCancel(); 1457 else if (const auto *OPFD = 1458 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1459 HasCancel = OPFD->hasCancel(); 1460 else if (const auto *OPFD = 1461 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1462 HasCancel = OPFD->hasCancel(); 1463 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1464 HasCancel, OutlinedHelperName); 1465 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1466 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1467 } 1468 1469 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1470 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1471 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1472 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1473 return emitParallelOrTeamsOutlinedFunction( 1474 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1475 } 1476 1477 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1478 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1479 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1480 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1481 return emitParallelOrTeamsOutlinedFunction( 1482 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1483 } 1484 1485 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1486 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1487 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1488 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1489 bool Tied, unsigned &NumberOfParts) { 1490 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1491 PrePostActionTy &) { 1492 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1493 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1494 llvm::Value *TaskArgs[] = { 1495 UpLoc, ThreadID, 1496 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1497 TaskTVar->getType()->castAs<PointerType>()) 1498 .getPointer()}; 1499 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1500 }; 1501 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1502 UntiedCodeGen); 1503 CodeGen.setAction(Action); 1504 assert(!ThreadIDVar->getType()->isPointerType() && 1505 "thread id variable must be of type kmp_int32 for tasks"); 1506 const OpenMPDirectiveKind Region = 1507 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1508 : OMPD_task; 1509 const CapturedStmt *CS = D.getCapturedStmt(Region); 1510 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1511 CodeGenFunction CGF(CGM, true); 1512 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1513 InnermostKind, 1514 TD ? TD->hasCancel() : false, Action); 1515 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1516 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1517 if (!Tied) 1518 NumberOfParts = Action.getNumberOfParts(); 1519 return Res; 1520 } 1521 1522 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1523 const RecordDecl *RD, const CGRecordLayout &RL, 1524 ArrayRef<llvm::Constant *> Data) { 1525 llvm::StructType *StructTy = RL.getLLVMType(); 1526 unsigned PrevIdx = 0; 1527 ConstantInitBuilder CIBuilder(CGM); 1528 auto DI = Data.begin(); 1529 for (const FieldDecl *FD : RD->fields()) { 1530 unsigned Idx = RL.getLLVMFieldNo(FD); 1531 // Fill the alignment. 1532 for (unsigned I = PrevIdx; I < Idx; ++I) 1533 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1534 PrevIdx = Idx + 1; 1535 Fields.add(*DI); 1536 ++DI; 1537 } 1538 } 1539 1540 template <class... As> 1541 static llvm::GlobalVariable * 1542 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1543 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1544 As &&... Args) { 1545 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1546 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1547 ConstantInitBuilder CIBuilder(CGM); 1548 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1549 buildStructValue(Fields, CGM, RD, RL, Data); 1550 return Fields.finishAndCreateGlobal( 1551 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1552 std::forward<As>(Args)...); 1553 } 1554 1555 template <typename T> 1556 static void 1557 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1558 ArrayRef<llvm::Constant *> Data, 1559 T &Parent) { 1560 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1561 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1562 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1563 buildStructValue(Fields, CGM, RD, RL, Data); 1564 Fields.finishAndAddTo(Parent); 1565 } 1566 1567 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1568 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1569 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1570 FlagsTy FlagsKey(Flags, Reserved2Flags); 1571 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1572 if (!Entry) { 1573 if (!DefaultOpenMPPSource) { 1574 // Initialize default location for psource field of ident_t structure of 1575 // all ident_t objects. Format is ";file;function;line;column;;". 1576 // Taken from 1577 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1578 DefaultOpenMPPSource = 1579 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1580 DefaultOpenMPPSource = 1581 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1582 } 1583 1584 llvm::Constant *Data[] = { 1585 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1586 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1587 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1588 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1589 llvm::GlobalValue *DefaultOpenMPLocation = 1590 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1591 llvm::GlobalValue::PrivateLinkage); 1592 DefaultOpenMPLocation->setUnnamedAddr( 1593 llvm::GlobalValue::UnnamedAddr::Global); 1594 1595 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1596 } 1597 return Address(Entry, Align); 1598 } 1599 1600 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1601 bool AtCurrentPoint) { 1602 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1603 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1604 1605 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1606 if (AtCurrentPoint) { 1607 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1608 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1609 } else { 1610 Elem.second.ServiceInsertPt = 1611 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1612 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1613 } 1614 } 1615 1616 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1617 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1618 if (Elem.second.ServiceInsertPt) { 1619 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1620 Elem.second.ServiceInsertPt = nullptr; 1621 Ptr->eraseFromParent(); 1622 } 1623 } 1624 1625 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1626 SourceLocation Loc, 1627 unsigned Flags) { 1628 Flags |= OMP_IDENT_KMPC; 1629 // If no debug info is generated - return global default location. 1630 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1631 Loc.isInvalid()) 1632 return getOrCreateDefaultLocation(Flags).getPointer(); 1633 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1637 Address LocValue = Address::invalid(); 1638 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1639 if (I != OpenMPLocThreadIDMap.end()) 1640 LocValue = Address(I->second.DebugLoc, Align); 1641 1642 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1643 // GetOpenMPThreadID was called before this routine. 1644 if (!LocValue.isValid()) { 1645 // Generate "ident_t .kmpc_loc.addr;" 1646 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1647 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1648 Elem.second.DebugLoc = AI.getPointer(); 1649 LocValue = AI; 1650 1651 if (!Elem.second.ServiceInsertPt) 1652 setLocThreadIdInsertPt(CGF); 1653 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1654 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1655 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1656 CGF.getTypeSize(IdentQTy)); 1657 } 1658 1659 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1660 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1661 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1662 LValue PSource = 1663 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1664 1665 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1666 if (OMPDebugLoc == nullptr) { 1667 SmallString<128> Buffer2; 1668 llvm::raw_svector_ostream OS2(Buffer2); 1669 // Build debug location 1670 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1671 OS2 << ";" << PLoc.getFilename() << ";"; 1672 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1673 OS2 << FD->getQualifiedNameAsString(); 1674 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1675 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1676 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1677 } 1678 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1679 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1680 1681 // Our callers always pass this to a runtime function, so for 1682 // convenience, go ahead and return a naked pointer. 1683 return LocValue.getPointer(); 1684 } 1685 1686 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1687 SourceLocation Loc) { 1688 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1689 1690 llvm::Value *ThreadID = nullptr; 1691 // Check whether we've already cached a load of the thread id in this 1692 // function. 1693 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1694 if (I != OpenMPLocThreadIDMap.end()) { 1695 ThreadID = I->second.ThreadID; 1696 if (ThreadID != nullptr) 1697 return ThreadID; 1698 } 1699 // If exceptions are enabled, do not use parameter to avoid possible crash. 1700 if (auto *OMPRegionInfo = 1701 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1702 if (OMPRegionInfo->getThreadIDVariable()) { 1703 // Check if this an outlined function with thread id passed as argument. 1704 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1705 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1706 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1707 !CGF.getLangOpts().CXXExceptions || 1708 CGF.Builder.GetInsertBlock() == TopBlock || 1709 !isa<llvm::Instruction>(LVal.getPointer()) || 1710 cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || 1711 cast<llvm::Instruction>(LVal.getPointer())->getParent() == 1712 CGF.Builder.GetInsertBlock()) { 1713 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1714 // If value loaded in entry block, cache it and use it everywhere in 1715 // function. 1716 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1717 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1718 Elem.second.ThreadID = ThreadID; 1719 } 1720 return ThreadID; 1721 } 1722 } 1723 } 1724 1725 // This is not an outlined function region - need to call __kmpc_int32 1726 // kmpc_global_thread_num(ident_t *loc). 1727 // Generate thread id value and cache this value for use across the 1728 // function. 1729 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1730 if (!Elem.second.ServiceInsertPt) 1731 setLocThreadIdInsertPt(CGF); 1732 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1733 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1734 llvm::CallInst *Call = CGF.Builder.CreateCall( 1735 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1736 emitUpdateLocation(CGF, Loc)); 1737 Call->setCallingConv(CGF.getRuntimeCC()); 1738 Elem.second.ThreadID = Call; 1739 return Call; 1740 } 1741 1742 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1743 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1744 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1745 clearLocThreadIdInsertPt(CGF); 1746 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1747 } 1748 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1749 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1750 UDRMap.erase(D); 1751 FunctionUDRMap.erase(CGF.CurFn); 1752 } 1753 auto I = FunctionUDMMap.find(CGF.CurFn); 1754 if (I != FunctionUDMMap.end()) { 1755 for(auto *D : I->second) 1756 UDMMap.erase(D); 1757 FunctionUDMMap.erase(I); 1758 } 1759 } 1760 1761 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1762 return IdentTy->getPointerTo(); 1763 } 1764 1765 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1766 if (!Kmpc_MicroTy) { 1767 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1768 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1769 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1770 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1771 } 1772 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1773 } 1774 1775 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1776 llvm::FunctionCallee RTLFn = nullptr; 1777 switch (static_cast<OpenMPRTLFunction>(Function)) { 1778 case OMPRTL__kmpc_fork_call: { 1779 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1780 // microtask, ...); 1781 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1782 getKmpc_MicroPointerTy()}; 1783 auto *FnTy = 1784 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1786 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1787 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1788 llvm::LLVMContext &Ctx = F->getContext(); 1789 llvm::MDBuilder MDB(Ctx); 1790 // Annotate the callback behavior of the __kmpc_fork_call: 1791 // - The callback callee is argument number 2 (microtask). 1792 // - The first two arguments of the callback callee are unknown (-1). 1793 // - All variadic arguments to the __kmpc_fork_call are passed to the 1794 // callback callee. 1795 F->addMetadata( 1796 llvm::LLVMContext::MD_callback, 1797 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1798 2, {-1, -1}, 1799 /* VarArgsArePassed */ true)})); 1800 } 1801 } 1802 break; 1803 } 1804 case OMPRTL__kmpc_global_thread_num: { 1805 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1806 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1807 auto *FnTy = 1808 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1809 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1810 break; 1811 } 1812 case OMPRTL__kmpc_threadprivate_cached: { 1813 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1814 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1816 CGM.VoidPtrTy, CGM.SizeTy, 1817 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1818 auto *FnTy = 1819 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1820 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1821 break; 1822 } 1823 case OMPRTL__kmpc_critical: { 1824 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1825 // kmp_critical_name *crit); 1826 llvm::Type *TypeParams[] = { 1827 getIdentTyPointerTy(), CGM.Int32Ty, 1828 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1829 auto *FnTy = 1830 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1831 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1832 break; 1833 } 1834 case OMPRTL__kmpc_critical_with_hint: { 1835 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1836 // kmp_critical_name *crit, uintptr_t hint); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1839 CGM.IntPtrTy}; 1840 auto *FnTy = 1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1843 break; 1844 } 1845 case OMPRTL__kmpc_threadprivate_register: { 1846 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1847 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1848 // typedef void *(*kmpc_ctor)(void *); 1849 auto *KmpcCtorTy = 1850 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1851 /*isVarArg*/ false)->getPointerTo(); 1852 // typedef void *(*kmpc_cctor)(void *, void *); 1853 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1854 auto *KmpcCopyCtorTy = 1855 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1856 /*isVarArg*/ false) 1857 ->getPointerTo(); 1858 // typedef void (*kmpc_dtor)(void *); 1859 auto *KmpcDtorTy = 1860 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1861 ->getPointerTo(); 1862 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1863 KmpcCopyCtorTy, KmpcDtorTy}; 1864 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1865 /*isVarArg*/ false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_critical: { 1870 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1871 // kmp_critical_name *crit); 1872 llvm::Type *TypeParams[] = { 1873 getIdentTyPointerTy(), CGM.Int32Ty, 1874 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1875 auto *FnTy = 1876 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_cancel_barrier: { 1881 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1882 // global_tid); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1884 auto *FnTy = 1885 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1886 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1887 break; 1888 } 1889 case OMPRTL__kmpc_barrier: { 1890 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1892 auto *FnTy = 1893 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1894 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1895 break; 1896 } 1897 case OMPRTL__kmpc_for_static_fini: { 1898 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1899 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1900 auto *FnTy = 1901 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1902 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1903 break; 1904 } 1905 case OMPRTL__kmpc_push_num_threads: { 1906 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1907 // kmp_int32 num_threads) 1908 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1909 CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_serialized_parallel: { 1916 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1917 // global_tid); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1919 auto *FnTy = 1920 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1921 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_end_serialized_parallel: { 1925 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1926 // global_tid); 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1928 auto *FnTy = 1929 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1930 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1931 break; 1932 } 1933 case OMPRTL__kmpc_flush: { 1934 // Build void __kmpc_flush(ident_t *loc); 1935 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1936 auto *FnTy = 1937 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1939 break; 1940 } 1941 case OMPRTL__kmpc_master: { 1942 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_end_master: { 1950 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1951 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1952 auto *FnTy = 1953 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1954 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1955 break; 1956 } 1957 case OMPRTL__kmpc_omp_taskyield: { 1958 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1959 // int end_part); 1960 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_single: { 1967 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1968 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1969 auto *FnTy = 1970 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1971 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1972 break; 1973 } 1974 case OMPRTL__kmpc_end_single: { 1975 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1976 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_omp_task_alloc: { 1983 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1984 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1985 // kmp_routine_entry_t *task_entry); 1986 assert(KmpRoutineEntryPtrTy != nullptr && 1987 "Type kmp_routine_entry_t must be created."); 1988 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1989 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1990 // Return void * and then cast to particular kmp_task_t type. 1991 auto *FnTy = 1992 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1993 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_omp_target_task_alloc: { 1997 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1998 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1999 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2000 assert(KmpRoutineEntryPtrTy != nullptr && 2001 "Type kmp_routine_entry_t must be created."); 2002 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2003 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2004 CGM.Int64Ty}; 2005 // Return void * and then cast to particular kmp_task_t type. 2006 auto *FnTy = 2007 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2009 break; 2010 } 2011 case OMPRTL__kmpc_omp_task: { 2012 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2013 // *new_task); 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2015 CGM.VoidPtrTy}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_copyprivate: { 2022 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2023 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2024 // kmp_int32 didit); 2025 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2026 auto *CpyFnTy = 2027 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2029 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2030 CGM.Int32Ty}; 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_reduce: { 2037 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2038 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2039 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2040 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2041 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2042 /*isVarArg=*/false); 2043 llvm::Type *TypeParams[] = { 2044 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2045 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2046 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_reduce_nowait: { 2053 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2054 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2055 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2056 // *lck); 2057 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2058 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2059 /*isVarArg=*/false); 2060 llvm::Type *TypeParams[] = { 2061 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2062 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2063 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2064 auto *FnTy = 2065 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2066 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2067 break; 2068 } 2069 case OMPRTL__kmpc_end_reduce: { 2070 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2071 // kmp_critical_name *lck); 2072 llvm::Type *TypeParams[] = { 2073 getIdentTyPointerTy(), CGM.Int32Ty, 2074 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2075 auto *FnTy = 2076 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2077 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2078 break; 2079 } 2080 case OMPRTL__kmpc_end_reduce_nowait: { 2081 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2082 // kmp_critical_name *lck); 2083 llvm::Type *TypeParams[] = { 2084 getIdentTyPointerTy(), CGM.Int32Ty, 2085 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2086 auto *FnTy = 2087 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2088 RTLFn = 2089 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_omp_task_begin_if0: { 2093 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2094 // *new_task); 2095 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2096 CGM.VoidPtrTy}; 2097 auto *FnTy = 2098 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2099 RTLFn = 2100 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2101 break; 2102 } 2103 case OMPRTL__kmpc_omp_task_complete_if0: { 2104 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2105 // *new_task); 2106 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2107 CGM.VoidPtrTy}; 2108 auto *FnTy = 2109 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2110 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2111 /*Name=*/"__kmpc_omp_task_complete_if0"); 2112 break; 2113 } 2114 case OMPRTL__kmpc_ordered: { 2115 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2116 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2117 auto *FnTy = 2118 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2119 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2120 break; 2121 } 2122 case OMPRTL__kmpc_end_ordered: { 2123 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2124 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2125 auto *FnTy = 2126 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2127 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2128 break; 2129 } 2130 case OMPRTL__kmpc_omp_taskwait: { 2131 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_taskgroup: { 2139 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2140 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_end_taskgroup: { 2147 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2148 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2149 auto *FnTy = 2150 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2151 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_push_proc_bind: { 2155 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2156 // int proc_bind) 2157 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2158 auto *FnTy = 2159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2160 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2161 break; 2162 } 2163 case OMPRTL__kmpc_omp_task_with_deps: { 2164 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2165 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2166 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2167 llvm::Type *TypeParams[] = { 2168 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2169 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2170 auto *FnTy = 2171 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2172 RTLFn = 2173 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2174 break; 2175 } 2176 case OMPRTL__kmpc_omp_wait_deps: { 2177 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2178 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2179 // kmp_depend_info_t *noalias_dep_list); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2181 CGM.Int32Ty, CGM.VoidPtrTy, 2182 CGM.Int32Ty, CGM.VoidPtrTy}; 2183 auto *FnTy = 2184 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2185 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2186 break; 2187 } 2188 case OMPRTL__kmpc_cancellationpoint: { 2189 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2190 // global_tid, kmp_int32 cncl_kind) 2191 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2192 auto *FnTy = 2193 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2194 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2195 break; 2196 } 2197 case OMPRTL__kmpc_cancel: { 2198 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2199 // kmp_int32 cncl_kind) 2200 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_push_num_teams: { 2207 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2208 // kmp_int32 num_teams, kmp_int32 num_threads) 2209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2210 CGM.Int32Ty}; 2211 auto *FnTy = 2212 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2213 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_fork_teams: { 2217 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2218 // microtask, ...); 2219 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2220 getKmpc_MicroPointerTy()}; 2221 auto *FnTy = 2222 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2223 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2224 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2225 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2226 llvm::LLVMContext &Ctx = F->getContext(); 2227 llvm::MDBuilder MDB(Ctx); 2228 // Annotate the callback behavior of the __kmpc_fork_teams: 2229 // - The callback callee is argument number 2 (microtask). 2230 // - The first two arguments of the callback callee are unknown (-1). 2231 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2232 // callback callee. 2233 F->addMetadata( 2234 llvm::LLVMContext::MD_callback, 2235 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2236 2, {-1, -1}, 2237 /* VarArgsArePassed */ true)})); 2238 } 2239 } 2240 break; 2241 } 2242 case OMPRTL__kmpc_taskloop: { 2243 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2244 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2245 // sched, kmp_uint64 grainsize, void *task_dup); 2246 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2247 CGM.IntTy, 2248 CGM.VoidPtrTy, 2249 CGM.IntTy, 2250 CGM.Int64Ty->getPointerTo(), 2251 CGM.Int64Ty->getPointerTo(), 2252 CGM.Int64Ty, 2253 CGM.IntTy, 2254 CGM.IntTy, 2255 CGM.Int64Ty, 2256 CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2260 break; 2261 } 2262 case OMPRTL__kmpc_doacross_init: { 2263 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2264 // num_dims, struct kmp_dim *dims); 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2266 CGM.Int32Ty, 2267 CGM.Int32Ty, 2268 CGM.VoidPtrTy}; 2269 auto *FnTy = 2270 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2271 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2272 break; 2273 } 2274 case OMPRTL__kmpc_doacross_fini: { 2275 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2276 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2280 break; 2281 } 2282 case OMPRTL__kmpc_doacross_post: { 2283 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2284 // *vec); 2285 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2286 CGM.Int64Ty->getPointerTo()}; 2287 auto *FnTy = 2288 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2289 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2290 break; 2291 } 2292 case OMPRTL__kmpc_doacross_wait: { 2293 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2294 // *vec); 2295 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2296 CGM.Int64Ty->getPointerTo()}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2300 break; 2301 } 2302 case OMPRTL__kmpc_task_reduction_init: { 2303 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2304 // *data); 2305 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2308 RTLFn = 2309 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2310 break; 2311 } 2312 case OMPRTL__kmpc_task_reduction_get_th_data: { 2313 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2314 // *d); 2315 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2316 auto *FnTy = 2317 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2318 RTLFn = CGM.CreateRuntimeFunction( 2319 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2320 break; 2321 } 2322 case OMPRTL__kmpc_alloc: { 2323 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2324 // al); omp_allocator_handle_t type is void *. 2325 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2329 break; 2330 } 2331 case OMPRTL__kmpc_free: { 2332 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2333 // al); omp_allocator_handle_t type is void *. 2334 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2338 break; 2339 } 2340 case OMPRTL__kmpc_push_target_tripcount: { 2341 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2342 // size); 2343 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2344 llvm::FunctionType *FnTy = 2345 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2346 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2347 break; 2348 } 2349 case OMPRTL__tgt_target: { 2350 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2351 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2352 // *arg_types); 2353 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2354 CGM.VoidPtrTy, 2355 CGM.Int32Ty, 2356 CGM.VoidPtrPtrTy, 2357 CGM.VoidPtrPtrTy, 2358 CGM.Int64Ty->getPointerTo(), 2359 CGM.Int64Ty->getPointerTo()}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2363 break; 2364 } 2365 case OMPRTL__tgt_target_nowait: { 2366 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2367 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2368 // int64_t *arg_types); 2369 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2370 CGM.VoidPtrTy, 2371 CGM.Int32Ty, 2372 CGM.VoidPtrPtrTy, 2373 CGM.VoidPtrPtrTy, 2374 CGM.Int64Ty->getPointerTo(), 2375 CGM.Int64Ty->getPointerTo()}; 2376 auto *FnTy = 2377 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2379 break; 2380 } 2381 case OMPRTL__tgt_target_teams: { 2382 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2383 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2384 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2385 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2386 CGM.VoidPtrTy, 2387 CGM.Int32Ty, 2388 CGM.VoidPtrPtrTy, 2389 CGM.VoidPtrPtrTy, 2390 CGM.Int64Ty->getPointerTo(), 2391 CGM.Int64Ty->getPointerTo(), 2392 CGM.Int32Ty, 2393 CGM.Int32Ty}; 2394 auto *FnTy = 2395 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2396 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2397 break; 2398 } 2399 case OMPRTL__tgt_target_teams_nowait: { 2400 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2401 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2402 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2403 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2404 CGM.VoidPtrTy, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo(), 2410 CGM.Int32Ty, 2411 CGM.Int32Ty}; 2412 auto *FnTy = 2413 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2415 break; 2416 } 2417 case OMPRTL__tgt_register_requires: { 2418 // Build void __tgt_register_requires(int64_t flags); 2419 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2420 auto *FnTy = 2421 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2422 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2423 break; 2424 } 2425 case OMPRTL__tgt_register_lib: { 2426 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2427 QualType ParamTy = 2428 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2429 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2433 break; 2434 } 2435 case OMPRTL__tgt_unregister_lib: { 2436 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2437 QualType ParamTy = 2438 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2439 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2440 auto *FnTy = 2441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2443 break; 2444 } 2445 case OMPRTL__tgt_target_data_begin: { 2446 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2447 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2448 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2449 CGM.Int32Ty, 2450 CGM.VoidPtrPtrTy, 2451 CGM.VoidPtrPtrTy, 2452 CGM.Int64Ty->getPointerTo(), 2453 CGM.Int64Ty->getPointerTo()}; 2454 auto *FnTy = 2455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2456 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2457 break; 2458 } 2459 case OMPRTL__tgt_target_data_begin_nowait: { 2460 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2461 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2462 // *arg_types); 2463 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2464 CGM.Int32Ty, 2465 CGM.VoidPtrPtrTy, 2466 CGM.VoidPtrPtrTy, 2467 CGM.Int64Ty->getPointerTo(), 2468 CGM.Int64Ty->getPointerTo()}; 2469 auto *FnTy = 2470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2472 break; 2473 } 2474 case OMPRTL__tgt_target_data_end: { 2475 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2476 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2477 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2478 CGM.Int32Ty, 2479 CGM.VoidPtrPtrTy, 2480 CGM.VoidPtrPtrTy, 2481 CGM.Int64Ty->getPointerTo(), 2482 CGM.Int64Ty->getPointerTo()}; 2483 auto *FnTy = 2484 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2485 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2486 break; 2487 } 2488 case OMPRTL__tgt_target_data_end_nowait: { 2489 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2490 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2491 // *arg_types); 2492 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2493 CGM.Int32Ty, 2494 CGM.VoidPtrPtrTy, 2495 CGM.VoidPtrPtrTy, 2496 CGM.Int64Ty->getPointerTo(), 2497 CGM.Int64Ty->getPointerTo()}; 2498 auto *FnTy = 2499 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2500 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2501 break; 2502 } 2503 case OMPRTL__tgt_target_data_update: { 2504 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2505 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2506 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2507 CGM.Int32Ty, 2508 CGM.VoidPtrPtrTy, 2509 CGM.VoidPtrPtrTy, 2510 CGM.Int64Ty->getPointerTo(), 2511 CGM.Int64Ty->getPointerTo()}; 2512 auto *FnTy = 2513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2514 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2515 break; 2516 } 2517 case OMPRTL__tgt_target_data_update_nowait: { 2518 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2519 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2520 // *arg_types); 2521 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2522 CGM.Int32Ty, 2523 CGM.VoidPtrPtrTy, 2524 CGM.VoidPtrPtrTy, 2525 CGM.Int64Ty->getPointerTo(), 2526 CGM.Int64Ty->getPointerTo()}; 2527 auto *FnTy = 2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2529 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2530 break; 2531 } 2532 case OMPRTL__tgt_mapper_num_components: { 2533 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2534 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2535 auto *FnTy = 2536 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2537 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2538 break; 2539 } 2540 case OMPRTL__tgt_push_mapper_component: { 2541 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2542 // *base, void *begin, int64_t size, int64_t type); 2543 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2544 CGM.Int64Ty, CGM.Int64Ty}; 2545 auto *FnTy = 2546 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2547 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2548 break; 2549 } 2550 } 2551 assert(RTLFn && "Unable to find OpenMP runtime function"); 2552 return RTLFn; 2553 } 2554 2555 llvm::FunctionCallee 2556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2557 assert((IVSize == 32 || IVSize == 64) && 2558 "IV size is not compatible with the omp runtime"); 2559 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2560 : "__kmpc_for_static_init_4u") 2561 : (IVSigned ? "__kmpc_for_static_init_8" 2562 : "__kmpc_for_static_init_8u"); 2563 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2564 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2565 llvm::Type *TypeParams[] = { 2566 getIdentTyPointerTy(), // loc 2567 CGM.Int32Ty, // tid 2568 CGM.Int32Ty, // schedtype 2569 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2570 PtrTy, // p_lower 2571 PtrTy, // p_upper 2572 PtrTy, // p_stride 2573 ITy, // incr 2574 ITy // chunk 2575 }; 2576 auto *FnTy = 2577 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2578 return CGM.CreateRuntimeFunction(FnTy, Name); 2579 } 2580 2581 llvm::FunctionCallee 2582 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2583 assert((IVSize == 32 || IVSize == 64) && 2584 "IV size is not compatible with the omp runtime"); 2585 StringRef Name = 2586 IVSize == 32 2587 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2588 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2589 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2590 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2591 CGM.Int32Ty, // tid 2592 CGM.Int32Ty, // schedtype 2593 ITy, // lower 2594 ITy, // upper 2595 ITy, // stride 2596 ITy // chunk 2597 }; 2598 auto *FnTy = 2599 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2600 return CGM.CreateRuntimeFunction(FnTy, Name); 2601 } 2602 2603 llvm::FunctionCallee 2604 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2605 assert((IVSize == 32 || IVSize == 64) && 2606 "IV size is not compatible with the omp runtime"); 2607 StringRef Name = 2608 IVSize == 32 2609 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2610 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2611 llvm::Type *TypeParams[] = { 2612 getIdentTyPointerTy(), // loc 2613 CGM.Int32Ty, // tid 2614 }; 2615 auto *FnTy = 2616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2617 return CGM.CreateRuntimeFunction(FnTy, Name); 2618 } 2619 2620 llvm::FunctionCallee 2621 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2622 assert((IVSize == 32 || IVSize == 64) && 2623 "IV size is not compatible with the omp runtime"); 2624 StringRef Name = 2625 IVSize == 32 2626 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2627 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2628 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2629 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2630 llvm::Type *TypeParams[] = { 2631 getIdentTyPointerTy(), // loc 2632 CGM.Int32Ty, // tid 2633 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2634 PtrTy, // p_lower 2635 PtrTy, // p_upper 2636 PtrTy // p_stride 2637 }; 2638 auto *FnTy = 2639 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2640 return CGM.CreateRuntimeFunction(FnTy, Name); 2641 } 2642 2643 /// Obtain information that uniquely identifies a target entry. This 2644 /// consists of the file and device IDs as well as line number associated with 2645 /// the relevant entry source location. 2646 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2647 unsigned &DeviceID, unsigned &FileID, 2648 unsigned &LineNum) { 2649 SourceManager &SM = C.getSourceManager(); 2650 2651 // The loc should be always valid and have a file ID (the user cannot use 2652 // #pragma directives in macros) 2653 2654 assert(Loc.isValid() && "Source location is expected to be always valid."); 2655 2656 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2657 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2658 2659 llvm::sys::fs::UniqueID ID; 2660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2662 << PLoc.getFilename() << EC.message(); 2663 2664 DeviceID = ID.getDevice(); 2665 FileID = ID.getFile(); 2666 LineNum = PLoc.getLine(); 2667 } 2668 2669 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2670 if (CGM.getLangOpts().OpenMPSimd) 2671 return Address::invalid(); 2672 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2673 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2674 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2675 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2676 HasRequiresUnifiedSharedMemory))) { 2677 SmallString<64> PtrName; 2678 { 2679 llvm::raw_svector_ostream OS(PtrName); 2680 OS << CGM.getMangledName(GlobalDecl(VD)); 2681 if (!VD->isExternallyVisible()) { 2682 unsigned DeviceID, FileID, Line; 2683 getTargetEntryUniqueInfo(CGM.getContext(), 2684 VD->getCanonicalDecl()->getBeginLoc(), 2685 DeviceID, FileID, Line); 2686 OS << llvm::format("_%x", FileID); 2687 } 2688 OS << "_decl_tgt_ref_ptr"; 2689 } 2690 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2691 if (!Ptr) { 2692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2693 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2694 PtrName); 2695 2696 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2697 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2698 2699 if (!CGM.getLangOpts().OpenMPIsDevice) 2700 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2701 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2702 } 2703 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2704 } 2705 return Address::invalid(); 2706 } 2707 2708 llvm::Constant * 2709 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2710 assert(!CGM.getLangOpts().OpenMPUseTLS || 2711 !CGM.getContext().getTargetInfo().isTLSSupported()); 2712 // Lookup the entry, lazily creating it if necessary. 2713 std::string Suffix = getName({"cache", ""}); 2714 return getOrCreateInternalVariable( 2715 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2716 } 2717 2718 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2719 const VarDecl *VD, 2720 Address VDAddr, 2721 SourceLocation Loc) { 2722 if (CGM.getLangOpts().OpenMPUseTLS && 2723 CGM.getContext().getTargetInfo().isTLSSupported()) 2724 return VDAddr; 2725 2726 llvm::Type *VarTy = VDAddr.getElementType(); 2727 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2728 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2729 CGM.Int8PtrTy), 2730 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2731 getOrCreateThreadPrivateCache(VD)}; 2732 return Address(CGF.EmitRuntimeCall( 2733 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2734 VDAddr.getAlignment()); 2735 } 2736 2737 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2738 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2739 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2740 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2741 // library. 2742 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2743 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2744 OMPLoc); 2745 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2746 // to register constructor/destructor for variable. 2747 llvm::Value *Args[] = { 2748 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2749 Ctor, CopyCtor, Dtor}; 2750 CGF.EmitRuntimeCall( 2751 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2752 } 2753 2754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2755 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2756 bool PerformInit, CodeGenFunction *CGF) { 2757 if (CGM.getLangOpts().OpenMPUseTLS && 2758 CGM.getContext().getTargetInfo().isTLSSupported()) 2759 return nullptr; 2760 2761 VD = VD->getDefinition(CGM.getContext()); 2762 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2763 QualType ASTTy = VD->getType(); 2764 2765 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2766 const Expr *Init = VD->getAnyInitializer(); 2767 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2768 // Generate function that re-emits the declaration's initializer into the 2769 // threadprivate copy of the variable VD 2770 CodeGenFunction CtorCGF(CGM); 2771 FunctionArgList Args; 2772 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2773 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2774 ImplicitParamDecl::Other); 2775 Args.push_back(&Dst); 2776 2777 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2778 CGM.getContext().VoidPtrTy, Args); 2779 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2780 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2781 llvm::Function *Fn = 2782 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2783 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2784 Args, Loc, Loc); 2785 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2786 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2787 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2788 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2789 Arg = CtorCGF.Builder.CreateElementBitCast( 2790 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2791 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2792 /*IsInitializer=*/true); 2793 ArgVal = CtorCGF.EmitLoadOfScalar( 2794 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2795 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2796 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2797 CtorCGF.FinishFunction(); 2798 Ctor = Fn; 2799 } 2800 if (VD->getType().isDestructedType() != QualType::DK_none) { 2801 // Generate function that emits destructor call for the threadprivate copy 2802 // of the variable VD 2803 CodeGenFunction DtorCGF(CGM); 2804 FunctionArgList Args; 2805 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2806 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2807 ImplicitParamDecl::Other); 2808 Args.push_back(&Dst); 2809 2810 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2811 CGM.getContext().VoidTy, Args); 2812 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2813 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2814 llvm::Function *Fn = 2815 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2816 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2817 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2818 Loc, Loc); 2819 // Create a scope with an artificial location for the body of this function. 2820 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2821 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2822 DtorCGF.GetAddrOfLocalVar(&Dst), 2823 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2824 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2825 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2826 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2827 DtorCGF.FinishFunction(); 2828 Dtor = Fn; 2829 } 2830 // Do not emit init function if it is not required. 2831 if (!Ctor && !Dtor) 2832 return nullptr; 2833 2834 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2835 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2836 /*isVarArg=*/false) 2837 ->getPointerTo(); 2838 // Copying constructor for the threadprivate variable. 2839 // Must be NULL - reserved by runtime, but currently it requires that this 2840 // parameter is always NULL. Otherwise it fires assertion. 2841 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2842 if (Ctor == nullptr) { 2843 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2844 /*isVarArg=*/false) 2845 ->getPointerTo(); 2846 Ctor = llvm::Constant::getNullValue(CtorTy); 2847 } 2848 if (Dtor == nullptr) { 2849 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2850 /*isVarArg=*/false) 2851 ->getPointerTo(); 2852 Dtor = llvm::Constant::getNullValue(DtorTy); 2853 } 2854 if (!CGF) { 2855 auto *InitFunctionTy = 2856 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2857 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2858 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2859 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2860 CodeGenFunction InitCGF(CGM); 2861 FunctionArgList ArgList; 2862 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2863 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2864 Loc, Loc); 2865 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2866 InitCGF.FinishFunction(); 2867 return InitFunction; 2868 } 2869 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2870 } 2871 return nullptr; 2872 } 2873 2874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2875 llvm::GlobalVariable *Addr, 2876 bool PerformInit) { 2877 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2878 !CGM.getLangOpts().OpenMPIsDevice) 2879 return false; 2880 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2882 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2884 HasRequiresUnifiedSharedMemory)) 2885 return CGM.getLangOpts().OpenMPIsDevice; 2886 VD = VD->getDefinition(CGM.getContext()); 2887 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2888 return CGM.getLangOpts().OpenMPIsDevice; 2889 2890 QualType ASTTy = VD->getType(); 2891 2892 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2893 // Produce the unique prefix to identify the new target regions. We use 2894 // the source location of the variable declaration which we know to not 2895 // conflict with any target region. 2896 unsigned DeviceID; 2897 unsigned FileID; 2898 unsigned Line; 2899 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2900 SmallString<128> Buffer, Out; 2901 { 2902 llvm::raw_svector_ostream OS(Buffer); 2903 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2904 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2905 } 2906 2907 const Expr *Init = VD->getAnyInitializer(); 2908 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2909 llvm::Constant *Ctor; 2910 llvm::Constant *ID; 2911 if (CGM.getLangOpts().OpenMPIsDevice) { 2912 // Generate function that re-emits the declaration's initializer into 2913 // the threadprivate copy of the variable VD 2914 CodeGenFunction CtorCGF(CGM); 2915 2916 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2917 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2918 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2919 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2920 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2921 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2922 FunctionArgList(), Loc, Loc); 2923 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2924 CtorCGF.EmitAnyExprToMem(Init, 2925 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2926 Init->getType().getQualifiers(), 2927 /*IsInitializer=*/true); 2928 CtorCGF.FinishFunction(); 2929 Ctor = Fn; 2930 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2931 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2932 } else { 2933 Ctor = new llvm::GlobalVariable( 2934 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2935 llvm::GlobalValue::PrivateLinkage, 2936 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2937 ID = Ctor; 2938 } 2939 2940 // Register the information for the entry associated with the constructor. 2941 Out.clear(); 2942 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2943 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2944 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2945 } 2946 if (VD->getType().isDestructedType() != QualType::DK_none) { 2947 llvm::Constant *Dtor; 2948 llvm::Constant *ID; 2949 if (CGM.getLangOpts().OpenMPIsDevice) { 2950 // Generate function that emits destructor call for the threadprivate 2951 // copy of the variable VD 2952 CodeGenFunction DtorCGF(CGM); 2953 2954 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2955 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2956 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2957 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2958 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2959 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2960 FunctionArgList(), Loc, Loc); 2961 // Create a scope with an artificial location for the body of this 2962 // function. 2963 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2964 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2965 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2966 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2967 DtorCGF.FinishFunction(); 2968 Dtor = Fn; 2969 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2970 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2971 } else { 2972 Dtor = new llvm::GlobalVariable( 2973 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2974 llvm::GlobalValue::PrivateLinkage, 2975 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2976 ID = Dtor; 2977 } 2978 // Register the information for the entry associated with the destructor. 2979 Out.clear(); 2980 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2981 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2982 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2983 } 2984 return CGM.getLangOpts().OpenMPIsDevice; 2985 } 2986 2987 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2988 QualType VarType, 2989 StringRef Name) { 2990 std::string Suffix = getName({"artificial", ""}); 2991 std::string CacheSuffix = getName({"cache", ""}); 2992 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2993 llvm::Value *GAddr = 2994 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2995 llvm::Value *Args[] = { 2996 emitUpdateLocation(CGF, SourceLocation()), 2997 getThreadID(CGF, SourceLocation()), 2998 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2999 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3000 /*isSigned=*/false), 3001 getOrCreateInternalVariable( 3002 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3003 return Address( 3004 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3005 CGF.EmitRuntimeCall( 3006 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3007 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3008 CGM.getPointerAlign()); 3009 } 3010 3011 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 3012 const RegionCodeGenTy &ThenGen, 3013 const RegionCodeGenTy &ElseGen) { 3014 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3015 3016 // If the condition constant folds and can be elided, try to avoid emitting 3017 // the condition and the dead arm of the if/else. 3018 bool CondConstant; 3019 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3020 if (CondConstant) 3021 ThenGen(CGF); 3022 else 3023 ElseGen(CGF); 3024 return; 3025 } 3026 3027 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3028 // emit the conditional branch. 3029 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3030 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3031 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3032 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3033 3034 // Emit the 'then' code. 3035 CGF.EmitBlock(ThenBlock); 3036 ThenGen(CGF); 3037 CGF.EmitBranch(ContBlock); 3038 // Emit the 'else' code if present. 3039 // There is no need to emit line number for unconditional branch. 3040 (void)ApplyDebugLocation::CreateEmpty(CGF); 3041 CGF.EmitBlock(ElseBlock); 3042 ElseGen(CGF); 3043 // There is no need to emit line number for unconditional branch. 3044 (void)ApplyDebugLocation::CreateEmpty(CGF); 3045 CGF.EmitBranch(ContBlock); 3046 // Emit the continuation block for code after the if. 3047 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3048 } 3049 3050 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3051 llvm::Function *OutlinedFn, 3052 ArrayRef<llvm::Value *> CapturedVars, 3053 const Expr *IfCond) { 3054 if (!CGF.HaveInsertPoint()) 3055 return; 3056 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3057 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3058 PrePostActionTy &) { 3059 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3060 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3061 llvm::Value *Args[] = { 3062 RTLoc, 3063 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3064 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3065 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3066 RealArgs.append(std::begin(Args), std::end(Args)); 3067 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3068 3069 llvm::FunctionCallee RTLFn = 3070 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3071 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3072 }; 3073 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3074 PrePostActionTy &) { 3075 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3076 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3077 // Build calls: 3078 // __kmpc_serialized_parallel(&Loc, GTid); 3079 llvm::Value *Args[] = {RTLoc, ThreadID}; 3080 CGF.EmitRuntimeCall( 3081 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3082 3083 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3084 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3085 Address ZeroAddrBound = 3086 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3087 /*Name=*/".bound.zero.addr"); 3088 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3089 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3090 // ThreadId for serialized parallels is 0. 3091 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3092 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3093 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3094 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3095 3096 // __kmpc_end_serialized_parallel(&Loc, GTid); 3097 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3098 CGF.EmitRuntimeCall( 3099 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3100 EndArgs); 3101 }; 3102 if (IfCond) { 3103 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3104 } else { 3105 RegionCodeGenTy ThenRCG(ThenGen); 3106 ThenRCG(CGF); 3107 } 3108 } 3109 3110 // If we're inside an (outlined) parallel region, use the region info's 3111 // thread-ID variable (it is passed in a first argument of the outlined function 3112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3113 // regular serial code region, get thread ID by calling kmp_int32 3114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3115 // return the address of that temp. 3116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3117 SourceLocation Loc) { 3118 if (auto *OMPRegionInfo = 3119 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3120 if (OMPRegionInfo->getThreadIDVariable()) 3121 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3122 3123 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3124 QualType Int32Ty = 3125 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3126 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3127 CGF.EmitStoreOfScalar(ThreadID, 3128 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3129 3130 return ThreadIDTemp; 3131 } 3132 3133 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3134 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3135 SmallString<256> Buffer; 3136 llvm::raw_svector_ostream Out(Buffer); 3137 Out << Name; 3138 StringRef RuntimeName = Out.str(); 3139 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3140 if (Elem.second) { 3141 assert(Elem.second->getType()->getPointerElementType() == Ty && 3142 "OMP internal variable has different type than requested"); 3143 return &*Elem.second; 3144 } 3145 3146 return Elem.second = new llvm::GlobalVariable( 3147 CGM.getModule(), Ty, /*IsConstant*/ false, 3148 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3149 Elem.first(), /*InsertBefore=*/nullptr, 3150 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3151 } 3152 3153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3154 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3155 std::string Name = getName({Prefix, "var"}); 3156 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3157 } 3158 3159 namespace { 3160 /// Common pre(post)-action for different OpenMP constructs. 3161 class CommonActionTy final : public PrePostActionTy { 3162 llvm::FunctionCallee EnterCallee; 3163 ArrayRef<llvm::Value *> EnterArgs; 3164 llvm::FunctionCallee ExitCallee; 3165 ArrayRef<llvm::Value *> ExitArgs; 3166 bool Conditional; 3167 llvm::BasicBlock *ContBlock = nullptr; 3168 3169 public: 3170 CommonActionTy(llvm::FunctionCallee EnterCallee, 3171 ArrayRef<llvm::Value *> EnterArgs, 3172 llvm::FunctionCallee ExitCallee, 3173 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3174 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3175 ExitArgs(ExitArgs), Conditional(Conditional) {} 3176 void Enter(CodeGenFunction &CGF) override { 3177 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3178 if (Conditional) { 3179 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3180 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3181 ContBlock = CGF.createBasicBlock("omp_if.end"); 3182 // Generate the branch (If-stmt) 3183 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3184 CGF.EmitBlock(ThenBlock); 3185 } 3186 } 3187 void Done(CodeGenFunction &CGF) { 3188 // Emit the rest of blocks/branches 3189 CGF.EmitBranch(ContBlock); 3190 CGF.EmitBlock(ContBlock, true); 3191 } 3192 void Exit(CodeGenFunction &CGF) override { 3193 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3194 } 3195 }; 3196 } // anonymous namespace 3197 3198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3199 StringRef CriticalName, 3200 const RegionCodeGenTy &CriticalOpGen, 3201 SourceLocation Loc, const Expr *Hint) { 3202 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3203 // CriticalOpGen(); 3204 // __kmpc_end_critical(ident_t *, gtid, Lock); 3205 // Prepare arguments and build a call to __kmpc_critical 3206 if (!CGF.HaveInsertPoint()) 3207 return; 3208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3209 getCriticalRegionLock(CriticalName)}; 3210 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3211 std::end(Args)); 3212 if (Hint) { 3213 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3214 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3215 } 3216 CommonActionTy Action( 3217 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3218 : OMPRTL__kmpc_critical), 3219 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3220 CriticalOpGen.setAction(Action); 3221 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3222 } 3223 3224 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3225 const RegionCodeGenTy &MasterOpGen, 3226 SourceLocation Loc) { 3227 if (!CGF.HaveInsertPoint()) 3228 return; 3229 // if(__kmpc_master(ident_t *, gtid)) { 3230 // MasterOpGen(); 3231 // __kmpc_end_master(ident_t *, gtid); 3232 // } 3233 // Prepare arguments and build a call to __kmpc_master 3234 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3235 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3236 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3237 /*Conditional=*/true); 3238 MasterOpGen.setAction(Action); 3239 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3240 Action.Done(CGF); 3241 } 3242 3243 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3244 SourceLocation Loc) { 3245 if (!CGF.HaveInsertPoint()) 3246 return; 3247 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3248 llvm::Value *Args[] = { 3249 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3250 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3251 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3252 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3253 Region->emitUntiedSwitch(CGF); 3254 } 3255 3256 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3257 const RegionCodeGenTy &TaskgroupOpGen, 3258 SourceLocation Loc) { 3259 if (!CGF.HaveInsertPoint()) 3260 return; 3261 // __kmpc_taskgroup(ident_t *, gtid); 3262 // TaskgroupOpGen(); 3263 // __kmpc_end_taskgroup(ident_t *, gtid); 3264 // Prepare arguments and build a call to __kmpc_taskgroup 3265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3266 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3267 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3268 Args); 3269 TaskgroupOpGen.setAction(Action); 3270 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3271 } 3272 3273 /// Given an array of pointers to variables, project the address of a 3274 /// given variable. 3275 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3276 unsigned Index, const VarDecl *Var) { 3277 // Pull out the pointer to the variable. 3278 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3279 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3280 3281 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3282 Addr = CGF.Builder.CreateElementBitCast( 3283 Addr, CGF.ConvertTypeForMem(Var->getType())); 3284 return Addr; 3285 } 3286 3287 static llvm::Value *emitCopyprivateCopyFunction( 3288 CodeGenModule &CGM, llvm::Type *ArgsType, 3289 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3290 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3291 SourceLocation Loc) { 3292 ASTContext &C = CGM.getContext(); 3293 // void copy_func(void *LHSArg, void *RHSArg); 3294 FunctionArgList Args; 3295 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3296 ImplicitParamDecl::Other); 3297 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3298 ImplicitParamDecl::Other); 3299 Args.push_back(&LHSArg); 3300 Args.push_back(&RHSArg); 3301 const auto &CGFI = 3302 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3303 std::string Name = 3304 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3305 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3306 llvm::GlobalValue::InternalLinkage, Name, 3307 &CGM.getModule()); 3308 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3309 Fn->setDoesNotRecurse(); 3310 CodeGenFunction CGF(CGM); 3311 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3312 // Dest = (void*[n])(LHSArg); 3313 // Src = (void*[n])(RHSArg); 3314 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3315 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3316 ArgsType), CGF.getPointerAlign()); 3317 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3318 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3319 ArgsType), CGF.getPointerAlign()); 3320 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3321 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3322 // ... 3323 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3324 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3325 const auto *DestVar = 3326 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3327 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3328 3329 const auto *SrcVar = 3330 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3331 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3332 3333 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3334 QualType Type = VD->getType(); 3335 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3336 } 3337 CGF.FinishFunction(); 3338 return Fn; 3339 } 3340 3341 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3342 const RegionCodeGenTy &SingleOpGen, 3343 SourceLocation Loc, 3344 ArrayRef<const Expr *> CopyprivateVars, 3345 ArrayRef<const Expr *> SrcExprs, 3346 ArrayRef<const Expr *> DstExprs, 3347 ArrayRef<const Expr *> AssignmentOps) { 3348 if (!CGF.HaveInsertPoint()) 3349 return; 3350 assert(CopyprivateVars.size() == SrcExprs.size() && 3351 CopyprivateVars.size() == DstExprs.size() && 3352 CopyprivateVars.size() == AssignmentOps.size()); 3353 ASTContext &C = CGM.getContext(); 3354 // int32 did_it = 0; 3355 // if(__kmpc_single(ident_t *, gtid)) { 3356 // SingleOpGen(); 3357 // __kmpc_end_single(ident_t *, gtid); 3358 // did_it = 1; 3359 // } 3360 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3361 // <copy_func>, did_it); 3362 3363 Address DidIt = Address::invalid(); 3364 if (!CopyprivateVars.empty()) { 3365 // int32 did_it = 0; 3366 QualType KmpInt32Ty = 3367 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3368 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3369 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3370 } 3371 // Prepare arguments and build a call to __kmpc_single 3372 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3373 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3374 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3375 /*Conditional=*/true); 3376 SingleOpGen.setAction(Action); 3377 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3378 if (DidIt.isValid()) { 3379 // did_it = 1; 3380 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3381 } 3382 Action.Done(CGF); 3383 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3384 // <copy_func>, did_it); 3385 if (DidIt.isValid()) { 3386 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3387 QualType CopyprivateArrayTy = C.getConstantArrayType( 3388 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3389 /*IndexTypeQuals=*/0); 3390 // Create a list of all private variables for copyprivate. 3391 Address CopyprivateList = 3392 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3393 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3394 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3395 CGF.Builder.CreateStore( 3396 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3397 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3398 Elem); 3399 } 3400 // Build function that copies private values from single region to all other 3401 // threads in the corresponding parallel region. 3402 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3403 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3404 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3405 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3406 Address CL = 3407 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3408 CGF.VoidPtrTy); 3409 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3410 llvm::Value *Args[] = { 3411 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3412 getThreadID(CGF, Loc), // i32 <gtid> 3413 BufSize, // size_t <buf_size> 3414 CL.getPointer(), // void *<copyprivate list> 3415 CpyFn, // void (*) (void *, void *) <copy_func> 3416 DidItVal // i32 did_it 3417 }; 3418 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3419 } 3420 } 3421 3422 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3423 const RegionCodeGenTy &OrderedOpGen, 3424 SourceLocation Loc, bool IsThreads) { 3425 if (!CGF.HaveInsertPoint()) 3426 return; 3427 // __kmpc_ordered(ident_t *, gtid); 3428 // OrderedOpGen(); 3429 // __kmpc_end_ordered(ident_t *, gtid); 3430 // Prepare arguments and build a call to __kmpc_ordered 3431 if (IsThreads) { 3432 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3433 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3434 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3435 Args); 3436 OrderedOpGen.setAction(Action); 3437 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3438 return; 3439 } 3440 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3441 } 3442 3443 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3444 unsigned Flags; 3445 if (Kind == OMPD_for) 3446 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3447 else if (Kind == OMPD_sections) 3448 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3449 else if (Kind == OMPD_single) 3450 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3451 else if (Kind == OMPD_barrier) 3452 Flags = OMP_IDENT_BARRIER_EXPL; 3453 else 3454 Flags = OMP_IDENT_BARRIER_IMPL; 3455 return Flags; 3456 } 3457 3458 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3459 CodeGenFunction &CGF, const OMPLoopDirective &S, 3460 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3461 // Check if the loop directive is actually a doacross loop directive. In this 3462 // case choose static, 1 schedule. 3463 if (llvm::any_of( 3464 S.getClausesOfKind<OMPOrderedClause>(), 3465 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3466 ScheduleKind = OMPC_SCHEDULE_static; 3467 // Chunk size is 1 in this case. 3468 llvm::APInt ChunkSize(32, 1); 3469 ChunkExpr = IntegerLiteral::Create( 3470 CGF.getContext(), ChunkSize, 3471 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3472 SourceLocation()); 3473 } 3474 } 3475 3476 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3477 OpenMPDirectiveKind Kind, bool EmitChecks, 3478 bool ForceSimpleCall) { 3479 if (!CGF.HaveInsertPoint()) 3480 return; 3481 // Build call __kmpc_cancel_barrier(loc, thread_id); 3482 // Build call __kmpc_barrier(loc, thread_id); 3483 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3484 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3485 // thread_id); 3486 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3487 getThreadID(CGF, Loc)}; 3488 if (auto *OMPRegionInfo = 3489 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3490 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3491 llvm::Value *Result = CGF.EmitRuntimeCall( 3492 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3493 if (EmitChecks) { 3494 // if (__kmpc_cancel_barrier()) { 3495 // exit from construct; 3496 // } 3497 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3498 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3499 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3500 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3501 CGF.EmitBlock(ExitBB); 3502 // exit from construct; 3503 CodeGenFunction::JumpDest CancelDestination = 3504 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3505 CGF.EmitBranchThroughCleanup(CancelDestination); 3506 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3507 } 3508 return; 3509 } 3510 } 3511 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3512 } 3513 3514 /// Map the OpenMP loop schedule to the runtime enumeration. 3515 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3516 bool Chunked, bool Ordered) { 3517 switch (ScheduleKind) { 3518 case OMPC_SCHEDULE_static: 3519 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3520 : (Ordered ? OMP_ord_static : OMP_sch_static); 3521 case OMPC_SCHEDULE_dynamic: 3522 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3523 case OMPC_SCHEDULE_guided: 3524 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3525 case OMPC_SCHEDULE_runtime: 3526 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3527 case OMPC_SCHEDULE_auto: 3528 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3529 case OMPC_SCHEDULE_unknown: 3530 assert(!Chunked && "chunk was specified but schedule kind not known"); 3531 return Ordered ? OMP_ord_static : OMP_sch_static; 3532 } 3533 llvm_unreachable("Unexpected runtime schedule"); 3534 } 3535 3536 /// Map the OpenMP distribute schedule to the runtime enumeration. 3537 static OpenMPSchedType 3538 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3539 // only static is allowed for dist_schedule 3540 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3541 } 3542 3543 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3544 bool Chunked) const { 3545 OpenMPSchedType Schedule = 3546 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3547 return Schedule == OMP_sch_static; 3548 } 3549 3550 bool CGOpenMPRuntime::isStaticNonchunked( 3551 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3552 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3553 return Schedule == OMP_dist_sch_static; 3554 } 3555 3556 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3557 bool Chunked) const { 3558 OpenMPSchedType Schedule = 3559 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3560 return Schedule == OMP_sch_static_chunked; 3561 } 3562 3563 bool CGOpenMPRuntime::isStaticChunked( 3564 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3565 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3566 return Schedule == OMP_dist_sch_static_chunked; 3567 } 3568 3569 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3570 OpenMPSchedType Schedule = 3571 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3572 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3573 return Schedule != OMP_sch_static; 3574 } 3575 3576 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3577 OpenMPScheduleClauseModifier M1, 3578 OpenMPScheduleClauseModifier M2) { 3579 int Modifier = 0; 3580 switch (M1) { 3581 case OMPC_SCHEDULE_MODIFIER_monotonic: 3582 Modifier = OMP_sch_modifier_monotonic; 3583 break; 3584 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3585 Modifier = OMP_sch_modifier_nonmonotonic; 3586 break; 3587 case OMPC_SCHEDULE_MODIFIER_simd: 3588 if (Schedule == OMP_sch_static_chunked) 3589 Schedule = OMP_sch_static_balanced_chunked; 3590 break; 3591 case OMPC_SCHEDULE_MODIFIER_last: 3592 case OMPC_SCHEDULE_MODIFIER_unknown: 3593 break; 3594 } 3595 switch (M2) { 3596 case OMPC_SCHEDULE_MODIFIER_monotonic: 3597 Modifier = OMP_sch_modifier_monotonic; 3598 break; 3599 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3600 Modifier = OMP_sch_modifier_nonmonotonic; 3601 break; 3602 case OMPC_SCHEDULE_MODIFIER_simd: 3603 if (Schedule == OMP_sch_static_chunked) 3604 Schedule = OMP_sch_static_balanced_chunked; 3605 break; 3606 case OMPC_SCHEDULE_MODIFIER_last: 3607 case OMPC_SCHEDULE_MODIFIER_unknown: 3608 break; 3609 } 3610 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3611 // If the static schedule kind is specified or if the ordered clause is 3612 // specified, and if the nonmonotonic modifier is not specified, the effect is 3613 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3614 // modifier is specified, the effect is as if the nonmonotonic modifier is 3615 // specified. 3616 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3617 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3618 Schedule == OMP_sch_static_balanced_chunked || 3619 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) 3620 Modifier = OMP_sch_modifier_nonmonotonic; 3621 } 3622 return Schedule | Modifier; 3623 } 3624 3625 void CGOpenMPRuntime::emitForDispatchInit( 3626 CodeGenFunction &CGF, SourceLocation Loc, 3627 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3628 bool Ordered, const DispatchRTInput &DispatchValues) { 3629 if (!CGF.HaveInsertPoint()) 3630 return; 3631 OpenMPSchedType Schedule = getRuntimeSchedule( 3632 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3633 assert(Ordered || 3634 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3635 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3636 Schedule != OMP_sch_static_balanced_chunked)); 3637 // Call __kmpc_dispatch_init( 3638 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3639 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3640 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3641 3642 // If the Chunk was not specified in the clause - use default value 1. 3643 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3644 : CGF.Builder.getIntN(IVSize, 1); 3645 llvm::Value *Args[] = { 3646 emitUpdateLocation(CGF, Loc), 3647 getThreadID(CGF, Loc), 3648 CGF.Builder.getInt32(addMonoNonMonoModifier( 3649 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3650 DispatchValues.LB, // Lower 3651 DispatchValues.UB, // Upper 3652 CGF.Builder.getIntN(IVSize, 1), // Stride 3653 Chunk // Chunk 3654 }; 3655 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3656 } 3657 3658 static void emitForStaticInitCall( 3659 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3660 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3661 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3662 const CGOpenMPRuntime::StaticRTInput &Values) { 3663 if (!CGF.HaveInsertPoint()) 3664 return; 3665 3666 assert(!Values.Ordered); 3667 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3668 Schedule == OMP_sch_static_balanced_chunked || 3669 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3670 Schedule == OMP_dist_sch_static || 3671 Schedule == OMP_dist_sch_static_chunked); 3672 3673 // Call __kmpc_for_static_init( 3674 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3675 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3676 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3677 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3678 llvm::Value *Chunk = Values.Chunk; 3679 if (Chunk == nullptr) { 3680 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3681 Schedule == OMP_dist_sch_static) && 3682 "expected static non-chunked schedule"); 3683 // If the Chunk was not specified in the clause - use default value 1. 3684 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3685 } else { 3686 assert((Schedule == OMP_sch_static_chunked || 3687 Schedule == OMP_sch_static_balanced_chunked || 3688 Schedule == OMP_ord_static_chunked || 3689 Schedule == OMP_dist_sch_static_chunked) && 3690 "expected static chunked schedule"); 3691 } 3692 llvm::Value *Args[] = { 3693 UpdateLocation, 3694 ThreadId, 3695 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3696 M2)), // Schedule type 3697 Values.IL.getPointer(), // &isLastIter 3698 Values.LB.getPointer(), // &LB 3699 Values.UB.getPointer(), // &UB 3700 Values.ST.getPointer(), // &Stride 3701 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3702 Chunk // Chunk 3703 }; 3704 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3705 } 3706 3707 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3708 SourceLocation Loc, 3709 OpenMPDirectiveKind DKind, 3710 const OpenMPScheduleTy &ScheduleKind, 3711 const StaticRTInput &Values) { 3712 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3713 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3714 assert(isOpenMPWorksharingDirective(DKind) && 3715 "Expected loop-based or sections-based directive."); 3716 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3717 isOpenMPLoopDirective(DKind) 3718 ? OMP_IDENT_WORK_LOOP 3719 : OMP_IDENT_WORK_SECTIONS); 3720 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3721 llvm::FunctionCallee StaticInitFunction = 3722 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3723 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3724 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3725 } 3726 3727 void CGOpenMPRuntime::emitDistributeStaticInit( 3728 CodeGenFunction &CGF, SourceLocation Loc, 3729 OpenMPDistScheduleClauseKind SchedKind, 3730 const CGOpenMPRuntime::StaticRTInput &Values) { 3731 OpenMPSchedType ScheduleNum = 3732 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3733 llvm::Value *UpdatedLocation = 3734 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3735 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3736 llvm::FunctionCallee StaticInitFunction = 3737 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3738 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3739 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3740 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3741 } 3742 3743 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3744 SourceLocation Loc, 3745 OpenMPDirectiveKind DKind) { 3746 if (!CGF.HaveInsertPoint()) 3747 return; 3748 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3749 llvm::Value *Args[] = { 3750 emitUpdateLocation(CGF, Loc, 3751 isOpenMPDistributeDirective(DKind) 3752 ? OMP_IDENT_WORK_DISTRIBUTE 3753 : isOpenMPLoopDirective(DKind) 3754 ? OMP_IDENT_WORK_LOOP 3755 : OMP_IDENT_WORK_SECTIONS), 3756 getThreadID(CGF, Loc)}; 3757 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3758 Args); 3759 } 3760 3761 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3762 SourceLocation Loc, 3763 unsigned IVSize, 3764 bool IVSigned) { 3765 if (!CGF.HaveInsertPoint()) 3766 return; 3767 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3768 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3769 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3770 } 3771 3772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3773 SourceLocation Loc, unsigned IVSize, 3774 bool IVSigned, Address IL, 3775 Address LB, Address UB, 3776 Address ST) { 3777 // Call __kmpc_dispatch_next( 3778 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3779 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3780 // kmp_int[32|64] *p_stride); 3781 llvm::Value *Args[] = { 3782 emitUpdateLocation(CGF, Loc), 3783 getThreadID(CGF, Loc), 3784 IL.getPointer(), // &isLastIter 3785 LB.getPointer(), // &Lower 3786 UB.getPointer(), // &Upper 3787 ST.getPointer() // &Stride 3788 }; 3789 llvm::Value *Call = 3790 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3791 return CGF.EmitScalarConversion( 3792 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3793 CGF.getContext().BoolTy, Loc); 3794 } 3795 3796 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3797 llvm::Value *NumThreads, 3798 SourceLocation Loc) { 3799 if (!CGF.HaveInsertPoint()) 3800 return; 3801 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3802 llvm::Value *Args[] = { 3803 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3804 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3805 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3806 Args); 3807 } 3808 3809 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3810 OpenMPProcBindClauseKind ProcBind, 3811 SourceLocation Loc) { 3812 if (!CGF.HaveInsertPoint()) 3813 return; 3814 // Constants for proc bind value accepted by the runtime. 3815 enum ProcBindTy { 3816 ProcBindFalse = 0, 3817 ProcBindTrue, 3818 ProcBindMaster, 3819 ProcBindClose, 3820 ProcBindSpread, 3821 ProcBindIntel, 3822 ProcBindDefault 3823 } RuntimeProcBind; 3824 switch (ProcBind) { 3825 case OMPC_PROC_BIND_master: 3826 RuntimeProcBind = ProcBindMaster; 3827 break; 3828 case OMPC_PROC_BIND_close: 3829 RuntimeProcBind = ProcBindClose; 3830 break; 3831 case OMPC_PROC_BIND_spread: 3832 RuntimeProcBind = ProcBindSpread; 3833 break; 3834 case OMPC_PROC_BIND_unknown: 3835 llvm_unreachable("Unsupported proc_bind value."); 3836 } 3837 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3838 llvm::Value *Args[] = { 3839 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3840 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3841 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3842 } 3843 3844 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3845 SourceLocation Loc) { 3846 if (!CGF.HaveInsertPoint()) 3847 return; 3848 // Build call void __kmpc_flush(ident_t *loc) 3849 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3850 emitUpdateLocation(CGF, Loc)); 3851 } 3852 3853 namespace { 3854 /// Indexes of fields for type kmp_task_t. 3855 enum KmpTaskTFields { 3856 /// List of shared variables. 3857 KmpTaskTShareds, 3858 /// Task routine. 3859 KmpTaskTRoutine, 3860 /// Partition id for the untied tasks. 3861 KmpTaskTPartId, 3862 /// Function with call of destructors for private variables. 3863 Data1, 3864 /// Task priority. 3865 Data2, 3866 /// (Taskloops only) Lower bound. 3867 KmpTaskTLowerBound, 3868 /// (Taskloops only) Upper bound. 3869 KmpTaskTUpperBound, 3870 /// (Taskloops only) Stride. 3871 KmpTaskTStride, 3872 /// (Taskloops only) Is last iteration flag. 3873 KmpTaskTLastIter, 3874 /// (Taskloops only) Reduction data. 3875 KmpTaskTReductions, 3876 }; 3877 } // anonymous namespace 3878 3879 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3880 return OffloadEntriesTargetRegion.empty() && 3881 OffloadEntriesDeviceGlobalVar.empty(); 3882 } 3883 3884 /// Initialize target region entry. 3885 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3886 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3887 StringRef ParentName, unsigned LineNum, 3888 unsigned Order) { 3889 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3890 "only required for the device " 3891 "code generation."); 3892 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3893 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3894 OMPTargetRegionEntryTargetRegion); 3895 ++OffloadingEntriesNum; 3896 } 3897 3898 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3899 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3900 StringRef ParentName, unsigned LineNum, 3901 llvm::Constant *Addr, llvm::Constant *ID, 3902 OMPTargetRegionEntryKind Flags) { 3903 // If we are emitting code for a target, the entry is already initialized, 3904 // only has to be registered. 3905 if (CGM.getLangOpts().OpenMPIsDevice) { 3906 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3907 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3908 DiagnosticsEngine::Error, 3909 "Unable to find target region on line '%0' in the device code."); 3910 CGM.getDiags().Report(DiagID) << LineNum; 3911 return; 3912 } 3913 auto &Entry = 3914 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3915 assert(Entry.isValid() && "Entry not initialized!"); 3916 Entry.setAddress(Addr); 3917 Entry.setID(ID); 3918 Entry.setFlags(Flags); 3919 } else { 3920 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3922 ++OffloadingEntriesNum; 3923 } 3924 } 3925 3926 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3927 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3928 unsigned LineNum) const { 3929 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3930 if (PerDevice == OffloadEntriesTargetRegion.end()) 3931 return false; 3932 auto PerFile = PerDevice->second.find(FileID); 3933 if (PerFile == PerDevice->second.end()) 3934 return false; 3935 auto PerParentName = PerFile->second.find(ParentName); 3936 if (PerParentName == PerFile->second.end()) 3937 return false; 3938 auto PerLine = PerParentName->second.find(LineNum); 3939 if (PerLine == PerParentName->second.end()) 3940 return false; 3941 // Fail if this entry is already registered. 3942 if (PerLine->second.getAddress() || PerLine->second.getID()) 3943 return false; 3944 return true; 3945 } 3946 3947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3948 const OffloadTargetRegionEntryInfoActTy &Action) { 3949 // Scan all target region entries and perform the provided action. 3950 for (const auto &D : OffloadEntriesTargetRegion) 3951 for (const auto &F : D.second) 3952 for (const auto &P : F.second) 3953 for (const auto &L : P.second) 3954 Action(D.first, F.first, P.first(), L.first, L.second); 3955 } 3956 3957 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3958 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3959 OMPTargetGlobalVarEntryKind Flags, 3960 unsigned Order) { 3961 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3962 "only required for the device " 3963 "code generation."); 3964 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3965 ++OffloadingEntriesNum; 3966 } 3967 3968 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3969 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3970 CharUnits VarSize, 3971 OMPTargetGlobalVarEntryKind Flags, 3972 llvm::GlobalValue::LinkageTypes Linkage) { 3973 if (CGM.getLangOpts().OpenMPIsDevice) { 3974 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3975 assert(Entry.isValid() && Entry.getFlags() == Flags && 3976 "Entry not initialized!"); 3977 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3978 "Resetting with the new address."); 3979 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3980 if (Entry.getVarSize().isZero()) { 3981 Entry.setVarSize(VarSize); 3982 Entry.setLinkage(Linkage); 3983 } 3984 return; 3985 } 3986 Entry.setVarSize(VarSize); 3987 Entry.setLinkage(Linkage); 3988 Entry.setAddress(Addr); 3989 } else { 3990 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3991 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3992 assert(Entry.isValid() && Entry.getFlags() == Flags && 3993 "Entry not initialized!"); 3994 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3995 "Resetting with the new address."); 3996 if (Entry.getVarSize().isZero()) { 3997 Entry.setVarSize(VarSize); 3998 Entry.setLinkage(Linkage); 3999 } 4000 return; 4001 } 4002 OffloadEntriesDeviceGlobalVar.try_emplace( 4003 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4004 ++OffloadingEntriesNum; 4005 } 4006 } 4007 4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4009 actOnDeviceGlobalVarEntriesInfo( 4010 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4011 // Scan all target region entries and perform the provided action. 4012 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4013 Action(E.getKey(), E.getValue()); 4014 } 4015 4016 void CGOpenMPRuntime::createOffloadEntry( 4017 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4018 llvm::GlobalValue::LinkageTypes Linkage) { 4019 StringRef Name = Addr->getName(); 4020 llvm::Module &M = CGM.getModule(); 4021 llvm::LLVMContext &C = M.getContext(); 4022 4023 // Create constant string with the name. 4024 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4025 4026 std::string StringName = getName({"omp_offloading", "entry_name"}); 4027 auto *Str = new llvm::GlobalVariable( 4028 M, StrPtrInit->getType(), /*isConstant=*/true, 4029 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4030 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4031 4032 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4033 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4034 llvm::ConstantInt::get(CGM.SizeTy, Size), 4035 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4036 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4037 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4038 llvm::GlobalVariable *Entry = createGlobalStruct( 4039 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4040 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4041 4042 // The entry has to be created in the section the linker expects it to be. 4043 Entry->setSection("omp_offloading_entries"); 4044 } 4045 4046 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4047 // Emit the offloading entries and metadata so that the device codegen side 4048 // can easily figure out what to emit. The produced metadata looks like 4049 // this: 4050 // 4051 // !omp_offload.info = !{!1, ...} 4052 // 4053 // Right now we only generate metadata for function that contain target 4054 // regions. 4055 4056 // If we are in simd mode or there are no entries, we don't need to do 4057 // anything. 4058 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4059 return; 4060 4061 llvm::Module &M = CGM.getModule(); 4062 llvm::LLVMContext &C = M.getContext(); 4063 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4064 SourceLocation, StringRef>, 4065 16> 4066 OrderedEntries(OffloadEntriesInfoManager.size()); 4067 llvm::SmallVector<StringRef, 16> ParentFunctions( 4068 OffloadEntriesInfoManager.size()); 4069 4070 // Auxiliary methods to create metadata values and strings. 4071 auto &&GetMDInt = [this](unsigned V) { 4072 return llvm::ConstantAsMetadata::get( 4073 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4074 }; 4075 4076 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4077 4078 // Create the offloading info metadata node. 4079 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4080 4081 // Create function that emits metadata for each target region entry; 4082 auto &&TargetRegionMetadataEmitter = 4083 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4084 &GetMDString]( 4085 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4086 unsigned Line, 4087 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4088 // Generate metadata for target regions. Each entry of this metadata 4089 // contains: 4090 // - Entry 0 -> Kind of this type of metadata (0). 4091 // - Entry 1 -> Device ID of the file where the entry was identified. 4092 // - Entry 2 -> File ID of the file where the entry was identified. 4093 // - Entry 3 -> Mangled name of the function where the entry was 4094 // identified. 4095 // - Entry 4 -> Line in the file where the entry was identified. 4096 // - Entry 5 -> Order the entry was created. 4097 // The first element of the metadata node is the kind. 4098 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4099 GetMDInt(FileID), GetMDString(ParentName), 4100 GetMDInt(Line), GetMDInt(E.getOrder())}; 4101 4102 SourceLocation Loc; 4103 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4104 E = CGM.getContext().getSourceManager().fileinfo_end(); 4105 I != E; ++I) { 4106 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4107 I->getFirst()->getUniqueID().getFile() == FileID) { 4108 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4109 I->getFirst(), Line, 1); 4110 break; 4111 } 4112 } 4113 // Save this entry in the right position of the ordered entries array. 4114 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4115 ParentFunctions[E.getOrder()] = ParentName; 4116 4117 // Add metadata to the named metadata node. 4118 MD->addOperand(llvm::MDNode::get(C, Ops)); 4119 }; 4120 4121 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4122 TargetRegionMetadataEmitter); 4123 4124 // Create function that emits metadata for each device global variable entry; 4125 auto &&DeviceGlobalVarMetadataEmitter = 4126 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4127 MD](StringRef MangledName, 4128 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4129 &E) { 4130 // Generate metadata for global variables. Each entry of this metadata 4131 // contains: 4132 // - Entry 0 -> Kind of this type of metadata (1). 4133 // - Entry 1 -> Mangled name of the variable. 4134 // - Entry 2 -> Declare target kind. 4135 // - Entry 3 -> Order the entry was created. 4136 // The first element of the metadata node is the kind. 4137 llvm::Metadata *Ops[] = { 4138 GetMDInt(E.getKind()), GetMDString(MangledName), 4139 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4140 4141 // Save this entry in the right position of the ordered entries array. 4142 OrderedEntries[E.getOrder()] = 4143 std::make_tuple(&E, SourceLocation(), MangledName); 4144 4145 // Add metadata to the named metadata node. 4146 MD->addOperand(llvm::MDNode::get(C, Ops)); 4147 }; 4148 4149 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4150 DeviceGlobalVarMetadataEmitter); 4151 4152 for (const auto &E : OrderedEntries) { 4153 assert(std::get<0>(E) && "All ordered entries must exist!"); 4154 if (const auto *CE = 4155 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4156 std::get<0>(E))) { 4157 if (!CE->getID() || !CE->getAddress()) { 4158 // Do not blame the entry if the parent funtion is not emitted. 4159 StringRef FnName = ParentFunctions[CE->getOrder()]; 4160 if (!CGM.GetGlobalValue(FnName)) 4161 continue; 4162 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4163 DiagnosticsEngine::Error, 4164 "Offloading entry for target region in %0 is incorrect: either the " 4165 "address or the ID is invalid."); 4166 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4167 continue; 4168 } 4169 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4170 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4171 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4172 OffloadEntryInfoDeviceGlobalVar>( 4173 std::get<0>(E))) { 4174 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4175 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4176 CE->getFlags()); 4177 switch (Flags) { 4178 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4179 if (CGM.getLangOpts().OpenMPIsDevice && 4180 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4181 continue; 4182 if (!CE->getAddress()) { 4183 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4184 DiagnosticsEngine::Error, "Offloading entry for declare target " 4185 "variable %0 is incorrect: the " 4186 "address is invalid."); 4187 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4188 continue; 4189 } 4190 // The vaiable has no definition - no need to add the entry. 4191 if (CE->getVarSize().isZero()) 4192 continue; 4193 break; 4194 } 4195 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4196 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4197 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4198 "Declaret target link address is set."); 4199 if (CGM.getLangOpts().OpenMPIsDevice) 4200 continue; 4201 if (!CE->getAddress()) { 4202 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4203 DiagnosticsEngine::Error, 4204 "Offloading entry for declare target variable is incorrect: the " 4205 "address is invalid."); 4206 CGM.getDiags().Report(DiagID); 4207 continue; 4208 } 4209 break; 4210 } 4211 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4212 CE->getVarSize().getQuantity(), Flags, 4213 CE->getLinkage()); 4214 } else { 4215 llvm_unreachable("Unsupported entry kind."); 4216 } 4217 } 4218 } 4219 4220 /// Loads all the offload entries information from the host IR 4221 /// metadata. 4222 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4223 // If we are in target mode, load the metadata from the host IR. This code has 4224 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4225 4226 if (!CGM.getLangOpts().OpenMPIsDevice) 4227 return; 4228 4229 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4230 return; 4231 4232 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4233 if (auto EC = Buf.getError()) { 4234 CGM.getDiags().Report(diag::err_cannot_open_file) 4235 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4236 return; 4237 } 4238 4239 llvm::LLVMContext C; 4240 auto ME = expectedToErrorOrAndEmitErrors( 4241 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4242 4243 if (auto EC = ME.getError()) { 4244 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4245 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4246 CGM.getDiags().Report(DiagID) 4247 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4248 return; 4249 } 4250 4251 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4252 if (!MD) 4253 return; 4254 4255 for (llvm::MDNode *MN : MD->operands()) { 4256 auto &&GetMDInt = [MN](unsigned Idx) { 4257 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4258 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4259 }; 4260 4261 auto &&GetMDString = [MN](unsigned Idx) { 4262 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4263 return V->getString(); 4264 }; 4265 4266 switch (GetMDInt(0)) { 4267 default: 4268 llvm_unreachable("Unexpected metadata!"); 4269 break; 4270 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4271 OffloadingEntryInfoTargetRegion: 4272 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4273 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4274 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4275 /*Order=*/GetMDInt(5)); 4276 break; 4277 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4278 OffloadingEntryInfoDeviceGlobalVar: 4279 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4280 /*MangledName=*/GetMDString(1), 4281 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4282 /*Flags=*/GetMDInt(2)), 4283 /*Order=*/GetMDInt(3)); 4284 break; 4285 } 4286 } 4287 } 4288 4289 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4290 if (!KmpRoutineEntryPtrTy) { 4291 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4292 ASTContext &C = CGM.getContext(); 4293 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4294 FunctionProtoType::ExtProtoInfo EPI; 4295 KmpRoutineEntryPtrQTy = C.getPointerType( 4296 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4297 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4298 } 4299 } 4300 4301 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4302 // Make sure the type of the entry is already created. This is the type we 4303 // have to create: 4304 // struct __tgt_offload_entry{ 4305 // void *addr; // Pointer to the offload entry info. 4306 // // (function or global) 4307 // char *name; // Name of the function or global. 4308 // size_t size; // Size of the entry info (0 if it a function). 4309 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4310 // int32_t reserved; // Reserved, to use by the runtime library. 4311 // }; 4312 if (TgtOffloadEntryQTy.isNull()) { 4313 ASTContext &C = CGM.getContext(); 4314 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4315 RD->startDefinition(); 4316 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4317 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4318 addFieldToRecordDecl(C, RD, C.getSizeType()); 4319 addFieldToRecordDecl( 4320 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4321 addFieldToRecordDecl( 4322 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4323 RD->completeDefinition(); 4324 RD->addAttr(PackedAttr::CreateImplicit(C)); 4325 TgtOffloadEntryQTy = C.getRecordType(RD); 4326 } 4327 return TgtOffloadEntryQTy; 4328 } 4329 4330 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4331 // These are the types we need to build: 4332 // struct __tgt_device_image{ 4333 // void *ImageStart; // Pointer to the target code start. 4334 // void *ImageEnd; // Pointer to the target code end. 4335 // // We also add the host entries to the device image, as it may be useful 4336 // // for the target runtime to have access to that information. 4337 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4338 // // the entries. 4339 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4340 // // entries (non inclusive). 4341 // }; 4342 if (TgtDeviceImageQTy.isNull()) { 4343 ASTContext &C = CGM.getContext(); 4344 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4345 RD->startDefinition(); 4346 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4347 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4348 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4349 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4350 RD->completeDefinition(); 4351 TgtDeviceImageQTy = C.getRecordType(RD); 4352 } 4353 return TgtDeviceImageQTy; 4354 } 4355 4356 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4357 // struct __tgt_bin_desc{ 4358 // int32_t NumDevices; // Number of devices supported. 4359 // __tgt_device_image *DeviceImages; // Arrays of device images 4360 // // (one per device). 4361 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4362 // // entries. 4363 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4364 // // entries (non inclusive). 4365 // }; 4366 if (TgtBinaryDescriptorQTy.isNull()) { 4367 ASTContext &C = CGM.getContext(); 4368 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4369 RD->startDefinition(); 4370 addFieldToRecordDecl( 4371 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4372 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4373 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4374 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4375 RD->completeDefinition(); 4376 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4377 } 4378 return TgtBinaryDescriptorQTy; 4379 } 4380 4381 namespace { 4382 struct PrivateHelpersTy { 4383 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4384 const VarDecl *PrivateElemInit) 4385 : Original(Original), PrivateCopy(PrivateCopy), 4386 PrivateElemInit(PrivateElemInit) {} 4387 const VarDecl *Original; 4388 const VarDecl *PrivateCopy; 4389 const VarDecl *PrivateElemInit; 4390 }; 4391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4392 } // anonymous namespace 4393 4394 static RecordDecl * 4395 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4396 if (!Privates.empty()) { 4397 ASTContext &C = CGM.getContext(); 4398 // Build struct .kmp_privates_t. { 4399 // /* private vars */ 4400 // }; 4401 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4402 RD->startDefinition(); 4403 for (const auto &Pair : Privates) { 4404 const VarDecl *VD = Pair.second.Original; 4405 QualType Type = VD->getType().getNonReferenceType(); 4406 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4407 if (VD->hasAttrs()) { 4408 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4409 E(VD->getAttrs().end()); 4410 I != E; ++I) 4411 FD->addAttr(*I); 4412 } 4413 } 4414 RD->completeDefinition(); 4415 return RD; 4416 } 4417 return nullptr; 4418 } 4419 4420 static RecordDecl * 4421 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4422 QualType KmpInt32Ty, 4423 QualType KmpRoutineEntryPointerQTy) { 4424 ASTContext &C = CGM.getContext(); 4425 // Build struct kmp_task_t { 4426 // void * shareds; 4427 // kmp_routine_entry_t routine; 4428 // kmp_int32 part_id; 4429 // kmp_cmplrdata_t data1; 4430 // kmp_cmplrdata_t data2; 4431 // For taskloops additional fields: 4432 // kmp_uint64 lb; 4433 // kmp_uint64 ub; 4434 // kmp_int64 st; 4435 // kmp_int32 liter; 4436 // void * reductions; 4437 // }; 4438 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4439 UD->startDefinition(); 4440 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4441 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4442 UD->completeDefinition(); 4443 QualType KmpCmplrdataTy = C.getRecordType(UD); 4444 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4445 RD->startDefinition(); 4446 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4447 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4448 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4449 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4451 if (isOpenMPTaskLoopDirective(Kind)) { 4452 QualType KmpUInt64Ty = 4453 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4454 QualType KmpInt64Ty = 4455 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4456 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4458 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4459 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4460 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4461 } 4462 RD->completeDefinition(); 4463 return RD; 4464 } 4465 4466 static RecordDecl * 4467 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4468 ArrayRef<PrivateDataTy> Privates) { 4469 ASTContext &C = CGM.getContext(); 4470 // Build struct kmp_task_t_with_privates { 4471 // kmp_task_t task_data; 4472 // .kmp_privates_t. privates; 4473 // }; 4474 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4475 RD->startDefinition(); 4476 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4477 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4478 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4479 RD->completeDefinition(); 4480 return RD; 4481 } 4482 4483 /// Emit a proxy function which accepts kmp_task_t as the second 4484 /// argument. 4485 /// \code 4486 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4487 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4488 /// For taskloops: 4489 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4490 /// tt->reductions, tt->shareds); 4491 /// return 0; 4492 /// } 4493 /// \endcode 4494 static llvm::Function * 4495 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4496 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4497 QualType KmpTaskTWithPrivatesPtrQTy, 4498 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4499 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4500 llvm::Value *TaskPrivatesMap) { 4501 ASTContext &C = CGM.getContext(); 4502 FunctionArgList Args; 4503 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4504 ImplicitParamDecl::Other); 4505 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4506 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4507 ImplicitParamDecl::Other); 4508 Args.push_back(&GtidArg); 4509 Args.push_back(&TaskTypeArg); 4510 const auto &TaskEntryFnInfo = 4511 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4512 llvm::FunctionType *TaskEntryTy = 4513 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4514 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4515 auto *TaskEntry = llvm::Function::Create( 4516 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4517 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4518 TaskEntry->setDoesNotRecurse(); 4519 CodeGenFunction CGF(CGM); 4520 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4521 Loc, Loc); 4522 4523 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4524 // tt, 4525 // For taskloops: 4526 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4527 // tt->task_data.shareds); 4528 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4529 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4530 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4531 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4532 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4533 const auto *KmpTaskTWithPrivatesQTyRD = 4534 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4535 LValue Base = 4536 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4537 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4538 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4539 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4540 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4541 4542 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4543 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4544 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4545 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4546 CGF.ConvertTypeForMem(SharedsPtrTy)); 4547 4548 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4549 llvm::Value *PrivatesParam; 4550 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4551 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4552 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4553 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4554 } else { 4555 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4556 } 4557 4558 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4559 TaskPrivatesMap, 4560 CGF.Builder 4561 .CreatePointerBitCastOrAddrSpaceCast( 4562 TDBase.getAddress(), CGF.VoidPtrTy) 4563 .getPointer()}; 4564 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4565 std::end(CommonArgs)); 4566 if (isOpenMPTaskLoopDirective(Kind)) { 4567 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4568 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4569 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4570 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4571 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4572 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4573 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4574 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4575 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4576 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4577 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4578 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4579 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4580 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4581 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4582 CallArgs.push_back(LBParam); 4583 CallArgs.push_back(UBParam); 4584 CallArgs.push_back(StParam); 4585 CallArgs.push_back(LIParam); 4586 CallArgs.push_back(RParam); 4587 } 4588 CallArgs.push_back(SharedsParam); 4589 4590 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4591 CallArgs); 4592 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4593 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4594 CGF.FinishFunction(); 4595 return TaskEntry; 4596 } 4597 4598 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4599 SourceLocation Loc, 4600 QualType KmpInt32Ty, 4601 QualType KmpTaskTWithPrivatesPtrQTy, 4602 QualType KmpTaskTWithPrivatesQTy) { 4603 ASTContext &C = CGM.getContext(); 4604 FunctionArgList Args; 4605 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4606 ImplicitParamDecl::Other); 4607 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4608 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4609 ImplicitParamDecl::Other); 4610 Args.push_back(&GtidArg); 4611 Args.push_back(&TaskTypeArg); 4612 const auto &DestructorFnInfo = 4613 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4614 llvm::FunctionType *DestructorFnTy = 4615 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4616 std::string Name = 4617 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4618 auto *DestructorFn = 4619 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4620 Name, &CGM.getModule()); 4621 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4622 DestructorFnInfo); 4623 DestructorFn->setDoesNotRecurse(); 4624 CodeGenFunction CGF(CGM); 4625 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4626 Args, Loc, Loc); 4627 4628 LValue Base = CGF.EmitLoadOfPointerLValue( 4629 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4630 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4631 const auto *KmpTaskTWithPrivatesQTyRD = 4632 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4633 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4634 Base = CGF.EmitLValueForField(Base, *FI); 4635 for (const auto *Field : 4636 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4637 if (QualType::DestructionKind DtorKind = 4638 Field->getType().isDestructedType()) { 4639 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4640 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4641 } 4642 } 4643 CGF.FinishFunction(); 4644 return DestructorFn; 4645 } 4646 4647 /// Emit a privates mapping function for correct handling of private and 4648 /// firstprivate variables. 4649 /// \code 4650 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4651 /// **noalias priv1,..., <tyn> **noalias privn) { 4652 /// *priv1 = &.privates.priv1; 4653 /// ...; 4654 /// *privn = &.privates.privn; 4655 /// } 4656 /// \endcode 4657 static llvm::Value * 4658 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4659 ArrayRef<const Expr *> PrivateVars, 4660 ArrayRef<const Expr *> FirstprivateVars, 4661 ArrayRef<const Expr *> LastprivateVars, 4662 QualType PrivatesQTy, 4663 ArrayRef<PrivateDataTy> Privates) { 4664 ASTContext &C = CGM.getContext(); 4665 FunctionArgList Args; 4666 ImplicitParamDecl TaskPrivatesArg( 4667 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4668 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4669 ImplicitParamDecl::Other); 4670 Args.push_back(&TaskPrivatesArg); 4671 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4672 unsigned Counter = 1; 4673 for (const Expr *E : PrivateVars) { 4674 Args.push_back(ImplicitParamDecl::Create( 4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4676 C.getPointerType(C.getPointerType(E->getType())) 4677 .withConst() 4678 .withRestrict(), 4679 ImplicitParamDecl::Other)); 4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4681 PrivateVarsPos[VD] = Counter; 4682 ++Counter; 4683 } 4684 for (const Expr *E : FirstprivateVars) { 4685 Args.push_back(ImplicitParamDecl::Create( 4686 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4687 C.getPointerType(C.getPointerType(E->getType())) 4688 .withConst() 4689 .withRestrict(), 4690 ImplicitParamDecl::Other)); 4691 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4692 PrivateVarsPos[VD] = Counter; 4693 ++Counter; 4694 } 4695 for (const Expr *E : LastprivateVars) { 4696 Args.push_back(ImplicitParamDecl::Create( 4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4698 C.getPointerType(C.getPointerType(E->getType())) 4699 .withConst() 4700 .withRestrict(), 4701 ImplicitParamDecl::Other)); 4702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4703 PrivateVarsPos[VD] = Counter; 4704 ++Counter; 4705 } 4706 const auto &TaskPrivatesMapFnInfo = 4707 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4708 llvm::FunctionType *TaskPrivatesMapTy = 4709 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4710 std::string Name = 4711 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4712 auto *TaskPrivatesMap = llvm::Function::Create( 4713 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4714 &CGM.getModule()); 4715 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4716 TaskPrivatesMapFnInfo); 4717 if (CGM.getLangOpts().Optimize) { 4718 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4719 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4720 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4721 } 4722 CodeGenFunction CGF(CGM); 4723 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4724 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4725 4726 // *privi = &.privates.privi; 4727 LValue Base = CGF.EmitLoadOfPointerLValue( 4728 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4729 TaskPrivatesArg.getType()->castAs<PointerType>()); 4730 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4731 Counter = 0; 4732 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4733 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4734 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4735 LValue RefLVal = 4736 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4737 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4738 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4739 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4740 ++Counter; 4741 } 4742 CGF.FinishFunction(); 4743 return TaskPrivatesMap; 4744 } 4745 4746 /// Emit initialization for private variables in task-based directives. 4747 static void emitPrivatesInit(CodeGenFunction &CGF, 4748 const OMPExecutableDirective &D, 4749 Address KmpTaskSharedsPtr, LValue TDBase, 4750 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4751 QualType SharedsTy, QualType SharedsPtrTy, 4752 const OMPTaskDataTy &Data, 4753 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4754 ASTContext &C = CGF.getContext(); 4755 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4756 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4757 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4758 ? OMPD_taskloop 4759 : OMPD_task; 4760 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4761 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4762 LValue SrcBase; 4763 bool IsTargetTask = 4764 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4765 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4766 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4767 // PointersArray and SizesArray. The original variables for these arrays are 4768 // not captured and we get their addresses explicitly. 4769 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4770 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4771 SrcBase = CGF.MakeAddrLValue( 4772 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4773 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4774 SharedsTy); 4775 } 4776 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4777 for (const PrivateDataTy &Pair : Privates) { 4778 const VarDecl *VD = Pair.second.PrivateCopy; 4779 const Expr *Init = VD->getAnyInitializer(); 4780 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4781 !CGF.isTrivialInitializer(Init)))) { 4782 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4783 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4784 const VarDecl *OriginalVD = Pair.second.Original; 4785 // Check if the variable is the target-based BasePointersArray, 4786 // PointersArray or SizesArray. 4787 LValue SharedRefLValue; 4788 QualType Type = PrivateLValue.getType(); 4789 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4790 if (IsTargetTask && !SharedField) { 4791 assert(isa<ImplicitParamDecl>(OriginalVD) && 4792 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4793 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4794 ->getNumParams() == 0 && 4795 isa<TranslationUnitDecl>( 4796 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4797 ->getDeclContext()) && 4798 "Expected artificial target data variable."); 4799 SharedRefLValue = 4800 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4801 } else { 4802 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4803 SharedRefLValue = CGF.MakeAddrLValue( 4804 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4805 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4806 SharedRefLValue.getTBAAInfo()); 4807 } 4808 if (Type->isArrayType()) { 4809 // Initialize firstprivate array. 4810 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4811 // Perform simple memcpy. 4812 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4813 } else { 4814 // Initialize firstprivate array using element-by-element 4815 // initialization. 4816 CGF.EmitOMPAggregateAssign( 4817 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4818 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4819 Address SrcElement) { 4820 // Clean up any temporaries needed by the initialization. 4821 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4822 InitScope.addPrivate( 4823 Elem, [SrcElement]() -> Address { return SrcElement; }); 4824 (void)InitScope.Privatize(); 4825 // Emit initialization for single element. 4826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4827 CGF, &CapturesInfo); 4828 CGF.EmitAnyExprToMem(Init, DestElement, 4829 Init->getType().getQualifiers(), 4830 /*IsInitializer=*/false); 4831 }); 4832 } 4833 } else { 4834 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4835 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4836 return SharedRefLValue.getAddress(); 4837 }); 4838 (void)InitScope.Privatize(); 4839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4840 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4841 /*capturedByInit=*/false); 4842 } 4843 } else { 4844 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4845 } 4846 } 4847 ++FI; 4848 } 4849 } 4850 4851 /// Check if duplication function is required for taskloops. 4852 static bool checkInitIsRequired(CodeGenFunction &CGF, 4853 ArrayRef<PrivateDataTy> Privates) { 4854 bool InitRequired = false; 4855 for (const PrivateDataTy &Pair : Privates) { 4856 const VarDecl *VD = Pair.second.PrivateCopy; 4857 const Expr *Init = VD->getAnyInitializer(); 4858 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4859 !CGF.isTrivialInitializer(Init)); 4860 if (InitRequired) 4861 break; 4862 } 4863 return InitRequired; 4864 } 4865 4866 4867 /// Emit task_dup function (for initialization of 4868 /// private/firstprivate/lastprivate vars and last_iter flag) 4869 /// \code 4870 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4871 /// lastpriv) { 4872 /// // setup lastprivate flag 4873 /// task_dst->last = lastpriv; 4874 /// // could be constructor calls here... 4875 /// } 4876 /// \endcode 4877 static llvm::Value * 4878 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4879 const OMPExecutableDirective &D, 4880 QualType KmpTaskTWithPrivatesPtrQTy, 4881 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4882 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4883 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4884 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4885 ASTContext &C = CGM.getContext(); 4886 FunctionArgList Args; 4887 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4888 KmpTaskTWithPrivatesPtrQTy, 4889 ImplicitParamDecl::Other); 4890 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4891 KmpTaskTWithPrivatesPtrQTy, 4892 ImplicitParamDecl::Other); 4893 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4894 ImplicitParamDecl::Other); 4895 Args.push_back(&DstArg); 4896 Args.push_back(&SrcArg); 4897 Args.push_back(&LastprivArg); 4898 const auto &TaskDupFnInfo = 4899 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4900 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4901 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4902 auto *TaskDup = llvm::Function::Create( 4903 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4904 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4905 TaskDup->setDoesNotRecurse(); 4906 CodeGenFunction CGF(CGM); 4907 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4908 Loc); 4909 4910 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4911 CGF.GetAddrOfLocalVar(&DstArg), 4912 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4913 // task_dst->liter = lastpriv; 4914 if (WithLastIter) { 4915 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4916 LValue Base = CGF.EmitLValueForField( 4917 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4918 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4919 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4920 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4921 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4922 } 4923 4924 // Emit initial values for private copies (if any). 4925 assert(!Privates.empty()); 4926 Address KmpTaskSharedsPtr = Address::invalid(); 4927 if (!Data.FirstprivateVars.empty()) { 4928 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4929 CGF.GetAddrOfLocalVar(&SrcArg), 4930 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4931 LValue Base = CGF.EmitLValueForField( 4932 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4933 KmpTaskSharedsPtr = Address( 4934 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4935 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4936 KmpTaskTShareds)), 4937 Loc), 4938 CGF.getNaturalTypeAlignment(SharedsTy)); 4939 } 4940 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4941 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4942 CGF.FinishFunction(); 4943 return TaskDup; 4944 } 4945 4946 /// Checks if destructor function is required to be generated. 4947 /// \return true if cleanups are required, false otherwise. 4948 static bool 4949 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4950 bool NeedsCleanup = false; 4951 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4952 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4953 for (const FieldDecl *FD : PrivateRD->fields()) { 4954 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4955 if (NeedsCleanup) 4956 break; 4957 } 4958 return NeedsCleanup; 4959 } 4960 4961 CGOpenMPRuntime::TaskResultTy 4962 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4963 const OMPExecutableDirective &D, 4964 llvm::Function *TaskFunction, QualType SharedsTy, 4965 Address Shareds, const OMPTaskDataTy &Data) { 4966 ASTContext &C = CGM.getContext(); 4967 llvm::SmallVector<PrivateDataTy, 4> Privates; 4968 // Aggregate privates and sort them by the alignment. 4969 auto I = Data.PrivateCopies.begin(); 4970 for (const Expr *E : Data.PrivateVars) { 4971 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4972 Privates.emplace_back( 4973 C.getDeclAlign(VD), 4974 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4975 /*PrivateElemInit=*/nullptr)); 4976 ++I; 4977 } 4978 I = Data.FirstprivateCopies.begin(); 4979 auto IElemInitRef = Data.FirstprivateInits.begin(); 4980 for (const Expr *E : Data.FirstprivateVars) { 4981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4982 Privates.emplace_back( 4983 C.getDeclAlign(VD), 4984 PrivateHelpersTy( 4985 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4986 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4987 ++I; 4988 ++IElemInitRef; 4989 } 4990 I = Data.LastprivateCopies.begin(); 4991 for (const Expr *E : Data.LastprivateVars) { 4992 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4993 Privates.emplace_back( 4994 C.getDeclAlign(VD), 4995 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4996 /*PrivateElemInit=*/nullptr)); 4997 ++I; 4998 } 4999 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5000 return L.first > R.first; 5001 }); 5002 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5003 // Build type kmp_routine_entry_t (if not built yet). 5004 emitKmpRoutineEntryT(KmpInt32Ty); 5005 // Build type kmp_task_t (if not built yet). 5006 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5007 if (SavedKmpTaskloopTQTy.isNull()) { 5008 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5009 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5010 } 5011 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5012 } else { 5013 assert((D.getDirectiveKind() == OMPD_task || 5014 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5015 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5016 "Expected taskloop, task or target directive"); 5017 if (SavedKmpTaskTQTy.isNull()) { 5018 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5019 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5020 } 5021 KmpTaskTQTy = SavedKmpTaskTQTy; 5022 } 5023 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5024 // Build particular struct kmp_task_t for the given task. 5025 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5026 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5027 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5028 QualType KmpTaskTWithPrivatesPtrQTy = 5029 C.getPointerType(KmpTaskTWithPrivatesQTy); 5030 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5031 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5032 KmpTaskTWithPrivatesTy->getPointerTo(); 5033 llvm::Value *KmpTaskTWithPrivatesTySize = 5034 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5035 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5036 5037 // Emit initial values for private copies (if any). 5038 llvm::Value *TaskPrivatesMap = nullptr; 5039 llvm::Type *TaskPrivatesMapTy = 5040 std::next(TaskFunction->arg_begin(), 3)->getType(); 5041 if (!Privates.empty()) { 5042 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5043 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5044 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5045 FI->getType(), Privates); 5046 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5047 TaskPrivatesMap, TaskPrivatesMapTy); 5048 } else { 5049 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5050 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5051 } 5052 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5053 // kmp_task_t *tt); 5054 llvm::Function *TaskEntry = emitProxyTaskFunction( 5055 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5056 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5057 TaskPrivatesMap); 5058 5059 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5060 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5061 // kmp_routine_entry_t *task_entry); 5062 // Task flags. Format is taken from 5063 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5064 // description of kmp_tasking_flags struct. 5065 enum { 5066 TiedFlag = 0x1, 5067 FinalFlag = 0x2, 5068 DestructorsFlag = 0x8, 5069 PriorityFlag = 0x20 5070 }; 5071 unsigned Flags = Data.Tied ? TiedFlag : 0; 5072 bool NeedsCleanup = false; 5073 if (!Privates.empty()) { 5074 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5075 if (NeedsCleanup) 5076 Flags = Flags | DestructorsFlag; 5077 } 5078 if (Data.Priority.getInt()) 5079 Flags = Flags | PriorityFlag; 5080 llvm::Value *TaskFlags = 5081 Data.Final.getPointer() 5082 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5083 CGF.Builder.getInt32(FinalFlag), 5084 CGF.Builder.getInt32(/*C=*/0)) 5085 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5086 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5087 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5088 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5089 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5090 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5091 TaskEntry, KmpRoutineEntryPtrTy)}; 5092 llvm::Value *NewTask; 5093 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5094 // Check if we have any device clause associated with the directive. 5095 const Expr *Device = nullptr; 5096 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5097 Device = C->getDevice(); 5098 // Emit device ID if any otherwise use default value. 5099 llvm::Value *DeviceID; 5100 if (Device) 5101 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5102 CGF.Int64Ty, /*isSigned=*/true); 5103 else 5104 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5105 AllocArgs.push_back(DeviceID); 5106 NewTask = CGF.EmitRuntimeCall( 5107 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5108 } else { 5109 NewTask = CGF.EmitRuntimeCall( 5110 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5111 } 5112 llvm::Value *NewTaskNewTaskTTy = 5113 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5114 NewTask, KmpTaskTWithPrivatesPtrTy); 5115 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5116 KmpTaskTWithPrivatesQTy); 5117 LValue TDBase = 5118 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5119 // Fill the data in the resulting kmp_task_t record. 5120 // Copy shareds if there are any. 5121 Address KmpTaskSharedsPtr = Address::invalid(); 5122 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5123 KmpTaskSharedsPtr = 5124 Address(CGF.EmitLoadOfScalar( 5125 CGF.EmitLValueForField( 5126 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5127 KmpTaskTShareds)), 5128 Loc), 5129 CGF.getNaturalTypeAlignment(SharedsTy)); 5130 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5131 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5132 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5133 } 5134 // Emit initial values for private copies (if any). 5135 TaskResultTy Result; 5136 if (!Privates.empty()) { 5137 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5138 SharedsTy, SharedsPtrTy, Data, Privates, 5139 /*ForDup=*/false); 5140 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5141 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5142 Result.TaskDupFn = emitTaskDupFunction( 5143 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5144 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5145 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5146 } 5147 } 5148 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5149 enum { Priority = 0, Destructors = 1 }; 5150 // Provide pointer to function with destructors for privates. 5151 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5152 const RecordDecl *KmpCmplrdataUD = 5153 (*FI)->getType()->getAsUnionType()->getDecl(); 5154 if (NeedsCleanup) { 5155 llvm::Value *DestructorFn = emitDestructorsFunction( 5156 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5157 KmpTaskTWithPrivatesQTy); 5158 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5159 LValue DestructorsLV = CGF.EmitLValueForField( 5160 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5161 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5162 DestructorFn, KmpRoutineEntryPtrTy), 5163 DestructorsLV); 5164 } 5165 // Set priority. 5166 if (Data.Priority.getInt()) { 5167 LValue Data2LV = CGF.EmitLValueForField( 5168 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5169 LValue PriorityLV = CGF.EmitLValueForField( 5170 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5171 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5172 } 5173 Result.NewTask = NewTask; 5174 Result.TaskEntry = TaskEntry; 5175 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5176 Result.TDBase = TDBase; 5177 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5178 return Result; 5179 } 5180 5181 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5182 const OMPExecutableDirective &D, 5183 llvm::Function *TaskFunction, 5184 QualType SharedsTy, Address Shareds, 5185 const Expr *IfCond, 5186 const OMPTaskDataTy &Data) { 5187 if (!CGF.HaveInsertPoint()) 5188 return; 5189 5190 TaskResultTy Result = 5191 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5192 llvm::Value *NewTask = Result.NewTask; 5193 llvm::Function *TaskEntry = Result.TaskEntry; 5194 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5195 LValue TDBase = Result.TDBase; 5196 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5197 ASTContext &C = CGM.getContext(); 5198 // Process list of dependences. 5199 Address DependenciesArray = Address::invalid(); 5200 unsigned NumDependencies = Data.Dependences.size(); 5201 if (NumDependencies) { 5202 // Dependence kind for RTL. 5203 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5204 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5205 RecordDecl *KmpDependInfoRD; 5206 QualType FlagsTy = 5207 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5208 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5209 if (KmpDependInfoTy.isNull()) { 5210 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5211 KmpDependInfoRD->startDefinition(); 5212 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5213 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5214 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5215 KmpDependInfoRD->completeDefinition(); 5216 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5217 } else { 5218 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5219 } 5220 // Define type kmp_depend_info[<Dependences.size()>]; 5221 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5222 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5223 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5224 // kmp_depend_info[<Dependences.size()>] deps; 5225 DependenciesArray = 5226 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5227 for (unsigned I = 0; I < NumDependencies; ++I) { 5228 const Expr *E = Data.Dependences[I].second; 5229 LValue Addr = CGF.EmitLValue(E); 5230 llvm::Value *Size; 5231 QualType Ty = E->getType(); 5232 if (const auto *ASE = 5233 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5234 LValue UpAddrLVal = 5235 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5236 llvm::Value *UpAddr = 5237 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5238 llvm::Value *LowIntPtr = 5239 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5240 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5241 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5242 } else { 5243 Size = CGF.getTypeSize(Ty); 5244 } 5245 LValue Base = CGF.MakeAddrLValue( 5246 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5247 KmpDependInfoTy); 5248 // deps[i].base_addr = &<Dependences[i].second>; 5249 LValue BaseAddrLVal = CGF.EmitLValueForField( 5250 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5251 CGF.EmitStoreOfScalar( 5252 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5253 BaseAddrLVal); 5254 // deps[i].len = sizeof(<Dependences[i].second>); 5255 LValue LenLVal = CGF.EmitLValueForField( 5256 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5257 CGF.EmitStoreOfScalar(Size, LenLVal); 5258 // deps[i].flags = <Dependences[i].first>; 5259 RTLDependenceKindTy DepKind; 5260 switch (Data.Dependences[I].first) { 5261 case OMPC_DEPEND_in: 5262 DepKind = DepIn; 5263 break; 5264 // Out and InOut dependencies must use the same code. 5265 case OMPC_DEPEND_out: 5266 case OMPC_DEPEND_inout: 5267 DepKind = DepInOut; 5268 break; 5269 case OMPC_DEPEND_mutexinoutset: 5270 DepKind = DepMutexInOutSet; 5271 break; 5272 case OMPC_DEPEND_source: 5273 case OMPC_DEPEND_sink: 5274 case OMPC_DEPEND_unknown: 5275 llvm_unreachable("Unknown task dependence type"); 5276 } 5277 LValue FlagsLVal = CGF.EmitLValueForField( 5278 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5279 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5280 FlagsLVal); 5281 } 5282 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5283 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5284 } 5285 5286 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5287 // libcall. 5288 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5289 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5290 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5291 // list is not empty 5292 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5293 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5294 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5295 llvm::Value *DepTaskArgs[7]; 5296 if (NumDependencies) { 5297 DepTaskArgs[0] = UpLoc; 5298 DepTaskArgs[1] = ThreadID; 5299 DepTaskArgs[2] = NewTask; 5300 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5301 DepTaskArgs[4] = DependenciesArray.getPointer(); 5302 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5303 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5304 } 5305 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5306 &TaskArgs, 5307 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5308 if (!Data.Tied) { 5309 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5310 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5311 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5312 } 5313 if (NumDependencies) { 5314 CGF.EmitRuntimeCall( 5315 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5316 } else { 5317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5318 TaskArgs); 5319 } 5320 // Check if parent region is untied and build return for untied task; 5321 if (auto *Region = 5322 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5323 Region->emitUntiedSwitch(CGF); 5324 }; 5325 5326 llvm::Value *DepWaitTaskArgs[6]; 5327 if (NumDependencies) { 5328 DepWaitTaskArgs[0] = UpLoc; 5329 DepWaitTaskArgs[1] = ThreadID; 5330 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5331 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5332 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5333 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5334 } 5335 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5336 NumDependencies, &DepWaitTaskArgs, 5337 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5338 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5339 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5340 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5341 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5342 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5343 // is specified. 5344 if (NumDependencies) 5345 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5346 DepWaitTaskArgs); 5347 // Call proxy_task_entry(gtid, new_task); 5348 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5349 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5350 Action.Enter(CGF); 5351 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5352 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5353 OutlinedFnArgs); 5354 }; 5355 5356 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5357 // kmp_task_t *new_task); 5358 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5359 // kmp_task_t *new_task); 5360 RegionCodeGenTy RCG(CodeGen); 5361 CommonActionTy Action( 5362 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5363 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5364 RCG.setAction(Action); 5365 RCG(CGF); 5366 }; 5367 5368 if (IfCond) { 5369 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5370 } else { 5371 RegionCodeGenTy ThenRCG(ThenCodeGen); 5372 ThenRCG(CGF); 5373 } 5374 } 5375 5376 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5377 const OMPLoopDirective &D, 5378 llvm::Function *TaskFunction, 5379 QualType SharedsTy, Address Shareds, 5380 const Expr *IfCond, 5381 const OMPTaskDataTy &Data) { 5382 if (!CGF.HaveInsertPoint()) 5383 return; 5384 TaskResultTy Result = 5385 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5386 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5387 // libcall. 5388 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5389 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5390 // sched, kmp_uint64 grainsize, void *task_dup); 5391 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5392 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5393 llvm::Value *IfVal; 5394 if (IfCond) { 5395 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5396 /*isSigned=*/true); 5397 } else { 5398 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5399 } 5400 5401 LValue LBLVal = CGF.EmitLValueForField( 5402 Result.TDBase, 5403 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5404 const auto *LBVar = 5405 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5406 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5407 /*IsInitializer=*/true); 5408 LValue UBLVal = CGF.EmitLValueForField( 5409 Result.TDBase, 5410 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5411 const auto *UBVar = 5412 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5413 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5414 /*IsInitializer=*/true); 5415 LValue StLVal = CGF.EmitLValueForField( 5416 Result.TDBase, 5417 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5418 const auto *StVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5420 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5421 /*IsInitializer=*/true); 5422 // Store reductions address. 5423 LValue RedLVal = CGF.EmitLValueForField( 5424 Result.TDBase, 5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5426 if (Data.Reductions) { 5427 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5428 } else { 5429 CGF.EmitNullInitialization(RedLVal.getAddress(), 5430 CGF.getContext().VoidPtrTy); 5431 } 5432 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5433 llvm::Value *TaskArgs[] = { 5434 UpLoc, 5435 ThreadID, 5436 Result.NewTask, 5437 IfVal, 5438 LBLVal.getPointer(), 5439 UBLVal.getPointer(), 5440 CGF.EmitLoadOfScalar(StLVal, Loc), 5441 llvm::ConstantInt::getSigned( 5442 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5443 llvm::ConstantInt::getSigned( 5444 CGF.IntTy, Data.Schedule.getPointer() 5445 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5446 : NoSchedule), 5447 Data.Schedule.getPointer() 5448 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5449 /*isSigned=*/false) 5450 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5451 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5452 Result.TaskDupFn, CGF.VoidPtrTy) 5453 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5454 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5455 } 5456 5457 /// Emit reduction operation for each element of array (required for 5458 /// array sections) LHS op = RHS. 5459 /// \param Type Type of array. 5460 /// \param LHSVar Variable on the left side of the reduction operation 5461 /// (references element of array in original variable). 5462 /// \param RHSVar Variable on the right side of the reduction operation 5463 /// (references element of array in original variable). 5464 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5465 /// RHSVar. 5466 static void EmitOMPAggregateReduction( 5467 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5468 const VarDecl *RHSVar, 5469 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5470 const Expr *, const Expr *)> &RedOpGen, 5471 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5472 const Expr *UpExpr = nullptr) { 5473 // Perform element-by-element initialization. 5474 QualType ElementTy; 5475 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5476 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5477 5478 // Drill down to the base element type on both arrays. 5479 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5480 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5481 5482 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5483 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5484 // Cast from pointer to array type to pointer to single element. 5485 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5486 // The basic structure here is a while-do loop. 5487 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5488 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5489 llvm::Value *IsEmpty = 5490 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5491 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5492 5493 // Enter the loop body, making that address the current address. 5494 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5495 CGF.EmitBlock(BodyBB); 5496 5497 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5498 5499 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5500 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5501 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5502 Address RHSElementCurrent = 5503 Address(RHSElementPHI, 5504 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5505 5506 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5507 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5508 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5509 Address LHSElementCurrent = 5510 Address(LHSElementPHI, 5511 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5512 5513 // Emit copy. 5514 CodeGenFunction::OMPPrivateScope Scope(CGF); 5515 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5516 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5517 Scope.Privatize(); 5518 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5519 Scope.ForceCleanup(); 5520 5521 // Shift the address forward by one element. 5522 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5523 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5524 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5525 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5526 // Check whether we've reached the end. 5527 llvm::Value *Done = 5528 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5529 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5530 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5531 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5532 5533 // Done. 5534 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5535 } 5536 5537 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5538 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5539 /// UDR combiner function. 5540 static void emitReductionCombiner(CodeGenFunction &CGF, 5541 const Expr *ReductionOp) { 5542 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5543 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5544 if (const auto *DRE = 5545 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5546 if (const auto *DRD = 5547 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5548 std::pair<llvm::Function *, llvm::Function *> Reduction = 5549 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5550 RValue Func = RValue::get(Reduction.first); 5551 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5552 CGF.EmitIgnoredExpr(ReductionOp); 5553 return; 5554 } 5555 CGF.EmitIgnoredExpr(ReductionOp); 5556 } 5557 5558 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5559 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5560 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5561 ArrayRef<const Expr *> ReductionOps) { 5562 ASTContext &C = CGM.getContext(); 5563 5564 // void reduction_func(void *LHSArg, void *RHSArg); 5565 FunctionArgList Args; 5566 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5567 ImplicitParamDecl::Other); 5568 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5569 ImplicitParamDecl::Other); 5570 Args.push_back(&LHSArg); 5571 Args.push_back(&RHSArg); 5572 const auto &CGFI = 5573 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5574 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5575 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5576 llvm::GlobalValue::InternalLinkage, Name, 5577 &CGM.getModule()); 5578 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5579 Fn->setDoesNotRecurse(); 5580 CodeGenFunction CGF(CGM); 5581 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5582 5583 // Dst = (void*[n])(LHSArg); 5584 // Src = (void*[n])(RHSArg); 5585 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5586 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5587 ArgsType), CGF.getPointerAlign()); 5588 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5589 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5590 ArgsType), CGF.getPointerAlign()); 5591 5592 // ... 5593 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5594 // ... 5595 CodeGenFunction::OMPPrivateScope Scope(CGF); 5596 auto IPriv = Privates.begin(); 5597 unsigned Idx = 0; 5598 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5599 const auto *RHSVar = 5600 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5601 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5602 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5603 }); 5604 const auto *LHSVar = 5605 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5606 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5607 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5608 }); 5609 QualType PrivTy = (*IPriv)->getType(); 5610 if (PrivTy->isVariablyModifiedType()) { 5611 // Get array size and emit VLA type. 5612 ++Idx; 5613 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5614 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5615 const VariableArrayType *VLA = 5616 CGF.getContext().getAsVariableArrayType(PrivTy); 5617 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5618 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5619 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5620 CGF.EmitVariablyModifiedType(PrivTy); 5621 } 5622 } 5623 Scope.Privatize(); 5624 IPriv = Privates.begin(); 5625 auto ILHS = LHSExprs.begin(); 5626 auto IRHS = RHSExprs.begin(); 5627 for (const Expr *E : ReductionOps) { 5628 if ((*IPriv)->getType()->isArrayType()) { 5629 // Emit reduction for array section. 5630 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5631 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5632 EmitOMPAggregateReduction( 5633 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5634 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5635 emitReductionCombiner(CGF, E); 5636 }); 5637 } else { 5638 // Emit reduction for array subscript or single variable. 5639 emitReductionCombiner(CGF, E); 5640 } 5641 ++IPriv; 5642 ++ILHS; 5643 ++IRHS; 5644 } 5645 Scope.ForceCleanup(); 5646 CGF.FinishFunction(); 5647 return Fn; 5648 } 5649 5650 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5651 const Expr *ReductionOp, 5652 const Expr *PrivateRef, 5653 const DeclRefExpr *LHS, 5654 const DeclRefExpr *RHS) { 5655 if (PrivateRef->getType()->isArrayType()) { 5656 // Emit reduction for array section. 5657 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5658 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5659 EmitOMPAggregateReduction( 5660 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5661 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5662 emitReductionCombiner(CGF, ReductionOp); 5663 }); 5664 } else { 5665 // Emit reduction for array subscript or single variable. 5666 emitReductionCombiner(CGF, ReductionOp); 5667 } 5668 } 5669 5670 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5671 ArrayRef<const Expr *> Privates, 5672 ArrayRef<const Expr *> LHSExprs, 5673 ArrayRef<const Expr *> RHSExprs, 5674 ArrayRef<const Expr *> ReductionOps, 5675 ReductionOptionsTy Options) { 5676 if (!CGF.HaveInsertPoint()) 5677 return; 5678 5679 bool WithNowait = Options.WithNowait; 5680 bool SimpleReduction = Options.SimpleReduction; 5681 5682 // Next code should be emitted for reduction: 5683 // 5684 // static kmp_critical_name lock = { 0 }; 5685 // 5686 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5687 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5688 // ... 5689 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5690 // *(Type<n>-1*)rhs[<n>-1]); 5691 // } 5692 // 5693 // ... 5694 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5695 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5696 // RedList, reduce_func, &<lock>)) { 5697 // case 1: 5698 // ... 5699 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5700 // ... 5701 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5702 // break; 5703 // case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5708 // break; 5709 // default:; 5710 // } 5711 // 5712 // if SimpleReduction is true, only the next code is generated: 5713 // ... 5714 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5715 // ... 5716 5717 ASTContext &C = CGM.getContext(); 5718 5719 if (SimpleReduction) { 5720 CodeGenFunction::RunCleanupsScope Scope(CGF); 5721 auto IPriv = Privates.begin(); 5722 auto ILHS = LHSExprs.begin(); 5723 auto IRHS = RHSExprs.begin(); 5724 for (const Expr *E : ReductionOps) { 5725 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5726 cast<DeclRefExpr>(*IRHS)); 5727 ++IPriv; 5728 ++ILHS; 5729 ++IRHS; 5730 } 5731 return; 5732 } 5733 5734 // 1. Build a list of reduction variables. 5735 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5736 auto Size = RHSExprs.size(); 5737 for (const Expr *E : Privates) { 5738 if (E->getType()->isVariablyModifiedType()) 5739 // Reserve place for array size. 5740 ++Size; 5741 } 5742 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5743 QualType ReductionArrayTy = 5744 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5745 /*IndexTypeQuals=*/0); 5746 Address ReductionList = 5747 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5748 auto IPriv = Privates.begin(); 5749 unsigned Idx = 0; 5750 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5751 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5752 CGF.Builder.CreateStore( 5753 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5754 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5755 Elem); 5756 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5757 // Store array size. 5758 ++Idx; 5759 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5760 llvm::Value *Size = CGF.Builder.CreateIntCast( 5761 CGF.getVLASize( 5762 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5763 .NumElts, 5764 CGF.SizeTy, /*isSigned=*/false); 5765 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5766 Elem); 5767 } 5768 } 5769 5770 // 2. Emit reduce_func(). 5771 llvm::Function *ReductionFn = emitReductionFunction( 5772 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5773 LHSExprs, RHSExprs, ReductionOps); 5774 5775 // 3. Create static kmp_critical_name lock = { 0 }; 5776 std::string Name = getName({"reduction"}); 5777 llvm::Value *Lock = getCriticalRegionLock(Name); 5778 5779 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5780 // RedList, reduce_func, &<lock>); 5781 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5782 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5783 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5784 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5785 ReductionList.getPointer(), CGF.VoidPtrTy); 5786 llvm::Value *Args[] = { 5787 IdentTLoc, // ident_t *<loc> 5788 ThreadId, // i32 <gtid> 5789 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5790 ReductionArrayTySize, // size_type sizeof(RedList) 5791 RL, // void *RedList 5792 ReductionFn, // void (*) (void *, void *) <reduce_func> 5793 Lock // kmp_critical_name *&<lock> 5794 }; 5795 llvm::Value *Res = CGF.EmitRuntimeCall( 5796 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5797 : OMPRTL__kmpc_reduce), 5798 Args); 5799 5800 // 5. Build switch(res) 5801 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5802 llvm::SwitchInst *SwInst = 5803 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5804 5805 // 6. Build case 1: 5806 // ... 5807 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5808 // ... 5809 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5810 // break; 5811 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5812 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5813 CGF.EmitBlock(Case1BB); 5814 5815 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5816 llvm::Value *EndArgs[] = { 5817 IdentTLoc, // ident_t *<loc> 5818 ThreadId, // i32 <gtid> 5819 Lock // kmp_critical_name *&<lock> 5820 }; 5821 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5822 CodeGenFunction &CGF, PrePostActionTy &Action) { 5823 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5824 auto IPriv = Privates.begin(); 5825 auto ILHS = LHSExprs.begin(); 5826 auto IRHS = RHSExprs.begin(); 5827 for (const Expr *E : ReductionOps) { 5828 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5829 cast<DeclRefExpr>(*IRHS)); 5830 ++IPriv; 5831 ++ILHS; 5832 ++IRHS; 5833 } 5834 }; 5835 RegionCodeGenTy RCG(CodeGen); 5836 CommonActionTy Action( 5837 nullptr, llvm::None, 5838 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5839 : OMPRTL__kmpc_end_reduce), 5840 EndArgs); 5841 RCG.setAction(Action); 5842 RCG(CGF); 5843 5844 CGF.EmitBranch(DefaultBB); 5845 5846 // 7. Build case 2: 5847 // ... 5848 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5849 // ... 5850 // break; 5851 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5852 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5853 CGF.EmitBlock(Case2BB); 5854 5855 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5856 CodeGenFunction &CGF, PrePostActionTy &Action) { 5857 auto ILHS = LHSExprs.begin(); 5858 auto IRHS = RHSExprs.begin(); 5859 auto IPriv = Privates.begin(); 5860 for (const Expr *E : ReductionOps) { 5861 const Expr *XExpr = nullptr; 5862 const Expr *EExpr = nullptr; 5863 const Expr *UpExpr = nullptr; 5864 BinaryOperatorKind BO = BO_Comma; 5865 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5866 if (BO->getOpcode() == BO_Assign) { 5867 XExpr = BO->getLHS(); 5868 UpExpr = BO->getRHS(); 5869 } 5870 } 5871 // Try to emit update expression as a simple atomic. 5872 const Expr *RHSExpr = UpExpr; 5873 if (RHSExpr) { 5874 // Analyze RHS part of the whole expression. 5875 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5876 RHSExpr->IgnoreParenImpCasts())) { 5877 // If this is a conditional operator, analyze its condition for 5878 // min/max reduction operator. 5879 RHSExpr = ACO->getCond(); 5880 } 5881 if (const auto *BORHS = 5882 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5883 EExpr = BORHS->getRHS(); 5884 BO = BORHS->getOpcode(); 5885 } 5886 } 5887 if (XExpr) { 5888 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5889 auto &&AtomicRedGen = [BO, VD, 5890 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5891 const Expr *EExpr, const Expr *UpExpr) { 5892 LValue X = CGF.EmitLValue(XExpr); 5893 RValue E; 5894 if (EExpr) 5895 E = CGF.EmitAnyExpr(EExpr); 5896 CGF.EmitOMPAtomicSimpleUpdateExpr( 5897 X, E, BO, /*IsXLHSInRHSPart=*/true, 5898 llvm::AtomicOrdering::Monotonic, Loc, 5899 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5900 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5901 PrivateScope.addPrivate( 5902 VD, [&CGF, VD, XRValue, Loc]() { 5903 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5904 CGF.emitOMPSimpleStore( 5905 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5906 VD->getType().getNonReferenceType(), Loc); 5907 return LHSTemp; 5908 }); 5909 (void)PrivateScope.Privatize(); 5910 return CGF.EmitAnyExpr(UpExpr); 5911 }); 5912 }; 5913 if ((*IPriv)->getType()->isArrayType()) { 5914 // Emit atomic reduction for array section. 5915 const auto *RHSVar = 5916 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5917 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5918 AtomicRedGen, XExpr, EExpr, UpExpr); 5919 } else { 5920 // Emit atomic reduction for array subscript or single variable. 5921 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5922 } 5923 } else { 5924 // Emit as a critical region. 5925 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5926 const Expr *, const Expr *) { 5927 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5928 std::string Name = RT.getName({"atomic_reduction"}); 5929 RT.emitCriticalRegion( 5930 CGF, Name, 5931 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5932 Action.Enter(CGF); 5933 emitReductionCombiner(CGF, E); 5934 }, 5935 Loc); 5936 }; 5937 if ((*IPriv)->getType()->isArrayType()) { 5938 const auto *LHSVar = 5939 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5940 const auto *RHSVar = 5941 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5942 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5943 CritRedGen); 5944 } else { 5945 CritRedGen(CGF, nullptr, nullptr, nullptr); 5946 } 5947 } 5948 ++ILHS; 5949 ++IRHS; 5950 ++IPriv; 5951 } 5952 }; 5953 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5954 if (!WithNowait) { 5955 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5956 llvm::Value *EndArgs[] = { 5957 IdentTLoc, // ident_t *<loc> 5958 ThreadId, // i32 <gtid> 5959 Lock // kmp_critical_name *&<lock> 5960 }; 5961 CommonActionTy Action(nullptr, llvm::None, 5962 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5963 EndArgs); 5964 AtomicRCG.setAction(Action); 5965 AtomicRCG(CGF); 5966 } else { 5967 AtomicRCG(CGF); 5968 } 5969 5970 CGF.EmitBranch(DefaultBB); 5971 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5972 } 5973 5974 /// Generates unique name for artificial threadprivate variables. 5975 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5976 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5977 const Expr *Ref) { 5978 SmallString<256> Buffer; 5979 llvm::raw_svector_ostream Out(Buffer); 5980 const clang::DeclRefExpr *DE; 5981 const VarDecl *D = ::getBaseDecl(Ref, DE); 5982 if (!D) 5983 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5984 D = D->getCanonicalDecl(); 5985 std::string Name = CGM.getOpenMPRuntime().getName( 5986 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5987 Out << Prefix << Name << "_" 5988 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5989 return Out.str(); 5990 } 5991 5992 /// Emits reduction initializer function: 5993 /// \code 5994 /// void @.red_init(void* %arg) { 5995 /// %0 = bitcast void* %arg to <type>* 5996 /// store <type> <init>, <type>* %0 5997 /// ret void 5998 /// } 5999 /// \endcode 6000 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6001 SourceLocation Loc, 6002 ReductionCodeGen &RCG, unsigned N) { 6003 ASTContext &C = CGM.getContext(); 6004 FunctionArgList Args; 6005 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6006 ImplicitParamDecl::Other); 6007 Args.emplace_back(&Param); 6008 const auto &FnInfo = 6009 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6010 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6011 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6012 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6013 Name, &CGM.getModule()); 6014 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6015 Fn->setDoesNotRecurse(); 6016 CodeGenFunction CGF(CGM); 6017 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6018 Address PrivateAddr = CGF.EmitLoadOfPointer( 6019 CGF.GetAddrOfLocalVar(&Param), 6020 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6021 llvm::Value *Size = nullptr; 6022 // If the size of the reduction item is non-constant, load it from global 6023 // threadprivate variable. 6024 if (RCG.getSizes(N).second) { 6025 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6026 CGF, CGM.getContext().getSizeType(), 6027 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6028 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6029 CGM.getContext().getSizeType(), Loc); 6030 } 6031 RCG.emitAggregateType(CGF, N, Size); 6032 LValue SharedLVal; 6033 // If initializer uses initializer from declare reduction construct, emit a 6034 // pointer to the address of the original reduction item (reuired by reduction 6035 // initializer) 6036 if (RCG.usesReductionInitializer(N)) { 6037 Address SharedAddr = 6038 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6039 CGF, CGM.getContext().VoidPtrTy, 6040 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6041 SharedAddr = CGF.EmitLoadOfPointer( 6042 SharedAddr, 6043 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6044 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6045 } else { 6046 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6047 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6048 CGM.getContext().VoidPtrTy); 6049 } 6050 // Emit the initializer: 6051 // %0 = bitcast void* %arg to <type>* 6052 // store <type> <init>, <type>* %0 6053 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6054 [](CodeGenFunction &) { return false; }); 6055 CGF.FinishFunction(); 6056 return Fn; 6057 } 6058 6059 /// Emits reduction combiner function: 6060 /// \code 6061 /// void @.red_comb(void* %arg0, void* %arg1) { 6062 /// %lhs = bitcast void* %arg0 to <type>* 6063 /// %rhs = bitcast void* %arg1 to <type>* 6064 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6065 /// store <type> %2, <type>* %lhs 6066 /// ret void 6067 /// } 6068 /// \endcode 6069 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6070 SourceLocation Loc, 6071 ReductionCodeGen &RCG, unsigned N, 6072 const Expr *ReductionOp, 6073 const Expr *LHS, const Expr *RHS, 6074 const Expr *PrivateRef) { 6075 ASTContext &C = CGM.getContext(); 6076 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6077 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6078 FunctionArgList Args; 6079 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6080 C.VoidPtrTy, ImplicitParamDecl::Other); 6081 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6082 ImplicitParamDecl::Other); 6083 Args.emplace_back(&ParamInOut); 6084 Args.emplace_back(&ParamIn); 6085 const auto &FnInfo = 6086 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6087 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6088 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6089 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6090 Name, &CGM.getModule()); 6091 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6092 Fn->setDoesNotRecurse(); 6093 CodeGenFunction CGF(CGM); 6094 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6095 llvm::Value *Size = nullptr; 6096 // If the size of the reduction item is non-constant, load it from global 6097 // threadprivate variable. 6098 if (RCG.getSizes(N).second) { 6099 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6100 CGF, CGM.getContext().getSizeType(), 6101 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6102 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6103 CGM.getContext().getSizeType(), Loc); 6104 } 6105 RCG.emitAggregateType(CGF, N, Size); 6106 // Remap lhs and rhs variables to the addresses of the function arguments. 6107 // %lhs = bitcast void* %arg0 to <type>* 6108 // %rhs = bitcast void* %arg1 to <type>* 6109 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6110 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6111 // Pull out the pointer to the variable. 6112 Address PtrAddr = CGF.EmitLoadOfPointer( 6113 CGF.GetAddrOfLocalVar(&ParamInOut), 6114 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6115 return CGF.Builder.CreateElementBitCast( 6116 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6117 }); 6118 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6119 // Pull out the pointer to the variable. 6120 Address PtrAddr = CGF.EmitLoadOfPointer( 6121 CGF.GetAddrOfLocalVar(&ParamIn), 6122 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6123 return CGF.Builder.CreateElementBitCast( 6124 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6125 }); 6126 PrivateScope.Privatize(); 6127 // Emit the combiner body: 6128 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6129 // store <type> %2, <type>* %lhs 6130 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6131 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6132 cast<DeclRefExpr>(RHS)); 6133 CGF.FinishFunction(); 6134 return Fn; 6135 } 6136 6137 /// Emits reduction finalizer function: 6138 /// \code 6139 /// void @.red_fini(void* %arg) { 6140 /// %0 = bitcast void* %arg to <type>* 6141 /// <destroy>(<type>* %0) 6142 /// ret void 6143 /// } 6144 /// \endcode 6145 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6146 SourceLocation Loc, 6147 ReductionCodeGen &RCG, unsigned N) { 6148 if (!RCG.needCleanups(N)) 6149 return nullptr; 6150 ASTContext &C = CGM.getContext(); 6151 FunctionArgList Args; 6152 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6153 ImplicitParamDecl::Other); 6154 Args.emplace_back(&Param); 6155 const auto &FnInfo = 6156 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6157 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6158 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6159 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6160 Name, &CGM.getModule()); 6161 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6162 Fn->setDoesNotRecurse(); 6163 CodeGenFunction CGF(CGM); 6164 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6165 Address PrivateAddr = CGF.EmitLoadOfPointer( 6166 CGF.GetAddrOfLocalVar(&Param), 6167 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6168 llvm::Value *Size = nullptr; 6169 // If the size of the reduction item is non-constant, load it from global 6170 // threadprivate variable. 6171 if (RCG.getSizes(N).second) { 6172 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6173 CGF, CGM.getContext().getSizeType(), 6174 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6175 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6176 CGM.getContext().getSizeType(), Loc); 6177 } 6178 RCG.emitAggregateType(CGF, N, Size); 6179 // Emit the finalizer body: 6180 // <destroy>(<type>* %0) 6181 RCG.emitCleanups(CGF, N, PrivateAddr); 6182 CGF.FinishFunction(); 6183 return Fn; 6184 } 6185 6186 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6187 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6188 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6189 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6190 return nullptr; 6191 6192 // Build typedef struct: 6193 // kmp_task_red_input { 6194 // void *reduce_shar; // shared reduction item 6195 // size_t reduce_size; // size of data item 6196 // void *reduce_init; // data initialization routine 6197 // void *reduce_fini; // data finalization routine 6198 // void *reduce_comb; // data combiner routine 6199 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6200 // } kmp_task_red_input_t; 6201 ASTContext &C = CGM.getContext(); 6202 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6203 RD->startDefinition(); 6204 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6205 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6206 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6207 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6208 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6209 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6210 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6211 RD->completeDefinition(); 6212 QualType RDType = C.getRecordType(RD); 6213 unsigned Size = Data.ReductionVars.size(); 6214 llvm::APInt ArraySize(/*numBits=*/64, Size); 6215 QualType ArrayRDType = C.getConstantArrayType( 6216 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6217 // kmp_task_red_input_t .rd_input.[Size]; 6218 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6219 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6220 Data.ReductionOps); 6221 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6222 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6223 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6224 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6225 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6226 TaskRedInput.getPointer(), Idxs, 6227 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6228 ".rd_input.gep."); 6229 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6230 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6231 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6232 RCG.emitSharedLValue(CGF, Cnt); 6233 llvm::Value *CastedShared = 6234 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6235 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6236 RCG.emitAggregateType(CGF, Cnt); 6237 llvm::Value *SizeValInChars; 6238 llvm::Value *SizeVal; 6239 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6240 // We use delayed creation/initialization for VLAs, array sections and 6241 // custom reduction initializations. It is required because runtime does not 6242 // provide the way to pass the sizes of VLAs/array sections to 6243 // initializer/combiner/finalizer functions and does not pass the pointer to 6244 // original reduction item to the initializer. Instead threadprivate global 6245 // variables are used to store these values and use them in the functions. 6246 bool DelayedCreation = !!SizeVal; 6247 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6248 /*isSigned=*/false); 6249 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6250 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6251 // ElemLVal.reduce_init = init; 6252 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6253 llvm::Value *InitAddr = 6254 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6255 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6256 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6257 // ElemLVal.reduce_fini = fini; 6258 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6259 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6260 llvm::Value *FiniAddr = Fini 6261 ? CGF.EmitCastToVoidPtr(Fini) 6262 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6263 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6264 // ElemLVal.reduce_comb = comb; 6265 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6266 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6267 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6268 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6269 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6270 // ElemLVal.flags = 0; 6271 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6272 if (DelayedCreation) { 6273 CGF.EmitStoreOfScalar( 6274 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6275 FlagsLVal); 6276 } else 6277 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6278 } 6279 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6280 // *data); 6281 llvm::Value *Args[] = { 6282 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6283 /*isSigned=*/true), 6284 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6285 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6286 CGM.VoidPtrTy)}; 6287 return CGF.EmitRuntimeCall( 6288 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6289 } 6290 6291 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6292 SourceLocation Loc, 6293 ReductionCodeGen &RCG, 6294 unsigned N) { 6295 auto Sizes = RCG.getSizes(N); 6296 // Emit threadprivate global variable if the type is non-constant 6297 // (Sizes.second = nullptr). 6298 if (Sizes.second) { 6299 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6300 /*isSigned=*/false); 6301 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6302 CGF, CGM.getContext().getSizeType(), 6303 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6304 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6305 } 6306 // Store address of the original reduction item if custom initializer is used. 6307 if (RCG.usesReductionInitializer(N)) { 6308 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6309 CGF, CGM.getContext().VoidPtrTy, 6310 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6311 CGF.Builder.CreateStore( 6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6313 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6314 SharedAddr, /*IsVolatile=*/false); 6315 } 6316 } 6317 6318 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6319 SourceLocation Loc, 6320 llvm::Value *ReductionsPtr, 6321 LValue SharedLVal) { 6322 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6323 // *d); 6324 llvm::Value *Args[] = { 6325 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6326 /*isSigned=*/true), 6327 ReductionsPtr, 6328 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6329 CGM.VoidPtrTy)}; 6330 return Address( 6331 CGF.EmitRuntimeCall( 6332 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6333 SharedLVal.getAlignment()); 6334 } 6335 6336 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6337 SourceLocation Loc) { 6338 if (!CGF.HaveInsertPoint()) 6339 return; 6340 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6341 // global_tid); 6342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6343 // Ignore return result until untied tasks are supported. 6344 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6345 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6346 Region->emitUntiedSwitch(CGF); 6347 } 6348 6349 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6350 OpenMPDirectiveKind InnerKind, 6351 const RegionCodeGenTy &CodeGen, 6352 bool HasCancel) { 6353 if (!CGF.HaveInsertPoint()) 6354 return; 6355 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6356 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6357 } 6358 6359 namespace { 6360 enum RTCancelKind { 6361 CancelNoreq = 0, 6362 CancelParallel = 1, 6363 CancelLoop = 2, 6364 CancelSections = 3, 6365 CancelTaskgroup = 4 6366 }; 6367 } // anonymous namespace 6368 6369 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6370 RTCancelKind CancelKind = CancelNoreq; 6371 if (CancelRegion == OMPD_parallel) 6372 CancelKind = CancelParallel; 6373 else if (CancelRegion == OMPD_for) 6374 CancelKind = CancelLoop; 6375 else if (CancelRegion == OMPD_sections) 6376 CancelKind = CancelSections; 6377 else { 6378 assert(CancelRegion == OMPD_taskgroup); 6379 CancelKind = CancelTaskgroup; 6380 } 6381 return CancelKind; 6382 } 6383 6384 void CGOpenMPRuntime::emitCancellationPointCall( 6385 CodeGenFunction &CGF, SourceLocation Loc, 6386 OpenMPDirectiveKind CancelRegion) { 6387 if (!CGF.HaveInsertPoint()) 6388 return; 6389 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6390 // global_tid, kmp_int32 cncl_kind); 6391 if (auto *OMPRegionInfo = 6392 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6393 // For 'cancellation point taskgroup', the task region info may not have a 6394 // cancel. This may instead happen in another adjacent task. 6395 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6396 llvm::Value *Args[] = { 6397 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6398 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6399 // Ignore return result until untied tasks are supported. 6400 llvm::Value *Result = CGF.EmitRuntimeCall( 6401 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6402 // if (__kmpc_cancellationpoint()) { 6403 // exit from construct; 6404 // } 6405 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6406 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6407 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6408 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6409 CGF.EmitBlock(ExitBB); 6410 // exit from construct; 6411 CodeGenFunction::JumpDest CancelDest = 6412 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6413 CGF.EmitBranchThroughCleanup(CancelDest); 6414 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6415 } 6416 } 6417 } 6418 6419 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6420 const Expr *IfCond, 6421 OpenMPDirectiveKind CancelRegion) { 6422 if (!CGF.HaveInsertPoint()) 6423 return; 6424 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6425 // kmp_int32 cncl_kind); 6426 if (auto *OMPRegionInfo = 6427 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6428 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6429 PrePostActionTy &) { 6430 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6431 llvm::Value *Args[] = { 6432 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6433 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6434 // Ignore return result until untied tasks are supported. 6435 llvm::Value *Result = CGF.EmitRuntimeCall( 6436 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6437 // if (__kmpc_cancel()) { 6438 // exit from construct; 6439 // } 6440 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6441 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6442 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6443 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6444 CGF.EmitBlock(ExitBB); 6445 // exit from construct; 6446 CodeGenFunction::JumpDest CancelDest = 6447 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6448 CGF.EmitBranchThroughCleanup(CancelDest); 6449 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6450 }; 6451 if (IfCond) { 6452 emitOMPIfClause(CGF, IfCond, ThenGen, 6453 [](CodeGenFunction &, PrePostActionTy &) {}); 6454 } else { 6455 RegionCodeGenTy ThenRCG(ThenGen); 6456 ThenRCG(CGF); 6457 } 6458 } 6459 } 6460 6461 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6462 const OMPExecutableDirective &D, StringRef ParentName, 6463 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6464 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6465 assert(!ParentName.empty() && "Invalid target region parent name!"); 6466 HasEmittedTargetRegion = true; 6467 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6468 IsOffloadEntry, CodeGen); 6469 } 6470 6471 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6472 const OMPExecutableDirective &D, StringRef ParentName, 6473 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6474 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6475 // Create a unique name for the entry function using the source location 6476 // information of the current target region. The name will be something like: 6477 // 6478 // __omp_offloading_DD_FFFF_PP_lBB 6479 // 6480 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6481 // mangled name of the function that encloses the target region and BB is the 6482 // line number of the target region. 6483 6484 unsigned DeviceID; 6485 unsigned FileID; 6486 unsigned Line; 6487 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6488 Line); 6489 SmallString<64> EntryFnName; 6490 { 6491 llvm::raw_svector_ostream OS(EntryFnName); 6492 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6493 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6494 } 6495 6496 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6497 6498 CodeGenFunction CGF(CGM, true); 6499 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6500 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6501 6502 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6503 6504 // If this target outline function is not an offload entry, we don't need to 6505 // register it. 6506 if (!IsOffloadEntry) 6507 return; 6508 6509 // The target region ID is used by the runtime library to identify the current 6510 // target region, so it only has to be unique and not necessarily point to 6511 // anything. It could be the pointer to the outlined function that implements 6512 // the target region, but we aren't using that so that the compiler doesn't 6513 // need to keep that, and could therefore inline the host function if proven 6514 // worthwhile during optimization. In the other hand, if emitting code for the 6515 // device, the ID has to be the function address so that it can retrieved from 6516 // the offloading entry and launched by the runtime library. We also mark the 6517 // outlined function to have external linkage in case we are emitting code for 6518 // the device, because these functions will be entry points to the device. 6519 6520 if (CGM.getLangOpts().OpenMPIsDevice) { 6521 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6522 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6523 OutlinedFn->setDSOLocal(false); 6524 } else { 6525 std::string Name = getName({EntryFnName, "region_id"}); 6526 OutlinedFnID = new llvm::GlobalVariable( 6527 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6528 llvm::GlobalValue::WeakAnyLinkage, 6529 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6530 } 6531 6532 // Register the information for the entry associated with this target region. 6533 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6534 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6535 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6536 } 6537 6538 /// Checks if the expression is constant or does not have non-trivial function 6539 /// calls. 6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6541 // We can skip constant expressions. 6542 // We can skip expressions with trivial calls or simple expressions. 6543 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6544 !E->hasNonTrivialCall(Ctx)) && 6545 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6546 } 6547 6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6549 const Stmt *Body) { 6550 const Stmt *Child = Body->IgnoreContainers(); 6551 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6552 Child = nullptr; 6553 for (const Stmt *S : C->body()) { 6554 if (const auto *E = dyn_cast<Expr>(S)) { 6555 if (isTrivial(Ctx, E)) 6556 continue; 6557 } 6558 // Some of the statements can be ignored. 6559 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6560 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6561 continue; 6562 // Analyze declarations. 6563 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6564 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6565 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6566 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6567 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6568 isa<UsingDirectiveDecl>(D) || 6569 isa<OMPDeclareReductionDecl>(D) || 6570 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6571 return true; 6572 const auto *VD = dyn_cast<VarDecl>(D); 6573 if (!VD) 6574 return false; 6575 return VD->isConstexpr() || 6576 ((VD->getType().isTrivialType(Ctx) || 6577 VD->getType()->isReferenceType()) && 6578 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6579 })) 6580 continue; 6581 } 6582 // Found multiple children - cannot get the one child only. 6583 if (Child) 6584 return nullptr; 6585 Child = S; 6586 } 6587 if (Child) 6588 Child = Child->IgnoreContainers(); 6589 } 6590 return Child; 6591 } 6592 6593 /// Emit the number of teams for a target directive. Inspect the num_teams 6594 /// clause associated with a teams construct combined or closely nested 6595 /// with the target directive. 6596 /// 6597 /// Emit a team of size one for directives such as 'target parallel' that 6598 /// have no associated teams construct. 6599 /// 6600 /// Otherwise, return nullptr. 6601 static llvm::Value * 6602 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6603 const OMPExecutableDirective &D) { 6604 assert(!CGF.getLangOpts().OpenMPIsDevice && 6605 "Clauses associated with the teams directive expected to be emitted " 6606 "only for the host!"); 6607 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6608 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6609 "Expected target-based executable directive."); 6610 CGBuilderTy &Bld = CGF.Builder; 6611 switch (DirectiveKind) { 6612 case OMPD_target: { 6613 const auto *CS = D.getInnermostCapturedStmt(); 6614 const auto *Body = 6615 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6616 const Stmt *ChildStmt = 6617 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6618 if (const auto *NestedDir = 6619 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6620 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6621 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6622 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6623 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6624 const Expr *NumTeams = 6625 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6626 llvm::Value *NumTeamsVal = 6627 CGF.EmitScalarExpr(NumTeams, 6628 /*IgnoreResultAssign*/ true); 6629 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6630 /*isSigned=*/true); 6631 } 6632 return Bld.getInt32(0); 6633 } 6634 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6635 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6636 return Bld.getInt32(1); 6637 return Bld.getInt32(0); 6638 } 6639 return nullptr; 6640 } 6641 case OMPD_target_teams: 6642 case OMPD_target_teams_distribute: 6643 case OMPD_target_teams_distribute_simd: 6644 case OMPD_target_teams_distribute_parallel_for: 6645 case OMPD_target_teams_distribute_parallel_for_simd: { 6646 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6647 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6648 const Expr *NumTeams = 6649 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6650 llvm::Value *NumTeamsVal = 6651 CGF.EmitScalarExpr(NumTeams, 6652 /*IgnoreResultAssign*/ true); 6653 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6654 /*isSigned=*/true); 6655 } 6656 return Bld.getInt32(0); 6657 } 6658 case OMPD_target_parallel: 6659 case OMPD_target_parallel_for: 6660 case OMPD_target_parallel_for_simd: 6661 case OMPD_target_simd: 6662 return Bld.getInt32(1); 6663 case OMPD_parallel: 6664 case OMPD_for: 6665 case OMPD_parallel_for: 6666 case OMPD_parallel_sections: 6667 case OMPD_for_simd: 6668 case OMPD_parallel_for_simd: 6669 case OMPD_cancel: 6670 case OMPD_cancellation_point: 6671 case OMPD_ordered: 6672 case OMPD_threadprivate: 6673 case OMPD_allocate: 6674 case OMPD_task: 6675 case OMPD_simd: 6676 case OMPD_sections: 6677 case OMPD_section: 6678 case OMPD_single: 6679 case OMPD_master: 6680 case OMPD_critical: 6681 case OMPD_taskyield: 6682 case OMPD_barrier: 6683 case OMPD_taskwait: 6684 case OMPD_taskgroup: 6685 case OMPD_atomic: 6686 case OMPD_flush: 6687 case OMPD_teams: 6688 case OMPD_target_data: 6689 case OMPD_target_exit_data: 6690 case OMPD_target_enter_data: 6691 case OMPD_distribute: 6692 case OMPD_distribute_simd: 6693 case OMPD_distribute_parallel_for: 6694 case OMPD_distribute_parallel_for_simd: 6695 case OMPD_teams_distribute: 6696 case OMPD_teams_distribute_simd: 6697 case OMPD_teams_distribute_parallel_for: 6698 case OMPD_teams_distribute_parallel_for_simd: 6699 case OMPD_target_update: 6700 case OMPD_declare_simd: 6701 case OMPD_declare_variant: 6702 case OMPD_declare_target: 6703 case OMPD_end_declare_target: 6704 case OMPD_declare_reduction: 6705 case OMPD_declare_mapper: 6706 case OMPD_taskloop: 6707 case OMPD_taskloop_simd: 6708 case OMPD_master_taskloop: 6709 case OMPD_master_taskloop_simd: 6710 case OMPD_parallel_master_taskloop: 6711 case OMPD_requires: 6712 case OMPD_unknown: 6713 break; 6714 } 6715 llvm_unreachable("Unexpected directive kind."); 6716 } 6717 6718 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6719 llvm::Value *DefaultThreadLimitVal) { 6720 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6721 CGF.getContext(), CS->getCapturedStmt()); 6722 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6723 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6724 llvm::Value *NumThreads = nullptr; 6725 llvm::Value *CondVal = nullptr; 6726 // Handle if clause. If if clause present, the number of threads is 6727 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6728 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6729 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6730 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6731 const OMPIfClause *IfClause = nullptr; 6732 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6733 if (C->getNameModifier() == OMPD_unknown || 6734 C->getNameModifier() == OMPD_parallel) { 6735 IfClause = C; 6736 break; 6737 } 6738 } 6739 if (IfClause) { 6740 const Expr *Cond = IfClause->getCondition(); 6741 bool Result; 6742 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6743 if (!Result) 6744 return CGF.Builder.getInt32(1); 6745 } else { 6746 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6747 if (const auto *PreInit = 6748 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6749 for (const auto *I : PreInit->decls()) { 6750 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6751 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6752 } else { 6753 CodeGenFunction::AutoVarEmission Emission = 6754 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6755 CGF.EmitAutoVarCleanups(Emission); 6756 } 6757 } 6758 } 6759 CondVal = CGF.EvaluateExprAsBool(Cond); 6760 } 6761 } 6762 } 6763 // Check the value of num_threads clause iff if clause was not specified 6764 // or is not evaluated to false. 6765 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6766 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6767 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6768 const auto *NumThreadsClause = 6769 Dir->getSingleClause<OMPNumThreadsClause>(); 6770 CodeGenFunction::LexicalScope Scope( 6771 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6772 if (const auto *PreInit = 6773 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6774 for (const auto *I : PreInit->decls()) { 6775 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6776 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6777 } else { 6778 CodeGenFunction::AutoVarEmission Emission = 6779 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6780 CGF.EmitAutoVarCleanups(Emission); 6781 } 6782 } 6783 } 6784 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6785 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6786 /*isSigned=*/false); 6787 if (DefaultThreadLimitVal) 6788 NumThreads = CGF.Builder.CreateSelect( 6789 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6790 DefaultThreadLimitVal, NumThreads); 6791 } else { 6792 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6793 : CGF.Builder.getInt32(0); 6794 } 6795 // Process condition of the if clause. 6796 if (CondVal) { 6797 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6798 CGF.Builder.getInt32(1)); 6799 } 6800 return NumThreads; 6801 } 6802 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6803 return CGF.Builder.getInt32(1); 6804 return DefaultThreadLimitVal; 6805 } 6806 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6807 : CGF.Builder.getInt32(0); 6808 } 6809 6810 /// Emit the number of threads for a target directive. Inspect the 6811 /// thread_limit clause associated with a teams construct combined or closely 6812 /// nested with the target directive. 6813 /// 6814 /// Emit the num_threads clause for directives such as 'target parallel' that 6815 /// have no associated teams construct. 6816 /// 6817 /// Otherwise, return nullptr. 6818 static llvm::Value * 6819 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6820 const OMPExecutableDirective &D) { 6821 assert(!CGF.getLangOpts().OpenMPIsDevice && 6822 "Clauses associated with the teams directive expected to be emitted " 6823 "only for the host!"); 6824 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6825 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6826 "Expected target-based executable directive."); 6827 CGBuilderTy &Bld = CGF.Builder; 6828 llvm::Value *ThreadLimitVal = nullptr; 6829 llvm::Value *NumThreadsVal = nullptr; 6830 switch (DirectiveKind) { 6831 case OMPD_target: { 6832 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6833 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6834 return NumThreads; 6835 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6836 CGF.getContext(), CS->getCapturedStmt()); 6837 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6838 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6839 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6840 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6841 const auto *ThreadLimitClause = 6842 Dir->getSingleClause<OMPThreadLimitClause>(); 6843 CodeGenFunction::LexicalScope Scope( 6844 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6845 if (const auto *PreInit = 6846 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6847 for (const auto *I : PreInit->decls()) { 6848 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6849 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6850 } else { 6851 CodeGenFunction::AutoVarEmission Emission = 6852 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6853 CGF.EmitAutoVarCleanups(Emission); 6854 } 6855 } 6856 } 6857 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6858 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6859 ThreadLimitVal = 6860 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6861 } 6862 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6863 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6864 CS = Dir->getInnermostCapturedStmt(); 6865 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6866 CGF.getContext(), CS->getCapturedStmt()); 6867 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6868 } 6869 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6870 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6871 CS = Dir->getInnermostCapturedStmt(); 6872 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6873 return NumThreads; 6874 } 6875 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6876 return Bld.getInt32(1); 6877 } 6878 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6879 } 6880 case OMPD_target_teams: { 6881 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6882 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6883 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6884 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6885 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6886 ThreadLimitVal = 6887 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6888 } 6889 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6890 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6891 return NumThreads; 6892 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6893 CGF.getContext(), CS->getCapturedStmt()); 6894 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6895 if (Dir->getDirectiveKind() == OMPD_distribute) { 6896 CS = Dir->getInnermostCapturedStmt(); 6897 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6898 return NumThreads; 6899 } 6900 } 6901 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6902 } 6903 case OMPD_target_teams_distribute: 6904 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6905 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6906 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6907 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6908 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6909 ThreadLimitVal = 6910 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6911 } 6912 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6913 case OMPD_target_parallel: 6914 case OMPD_target_parallel_for: 6915 case OMPD_target_parallel_for_simd: 6916 case OMPD_target_teams_distribute_parallel_for: 6917 case OMPD_target_teams_distribute_parallel_for_simd: { 6918 llvm::Value *CondVal = nullptr; 6919 // Handle if clause. If if clause present, the number of threads is 6920 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6921 if (D.hasClausesOfKind<OMPIfClause>()) { 6922 const OMPIfClause *IfClause = nullptr; 6923 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6924 if (C->getNameModifier() == OMPD_unknown || 6925 C->getNameModifier() == OMPD_parallel) { 6926 IfClause = C; 6927 break; 6928 } 6929 } 6930 if (IfClause) { 6931 const Expr *Cond = IfClause->getCondition(); 6932 bool Result; 6933 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6934 if (!Result) 6935 return Bld.getInt32(1); 6936 } else { 6937 CodeGenFunction::RunCleanupsScope Scope(CGF); 6938 CondVal = CGF.EvaluateExprAsBool(Cond); 6939 } 6940 } 6941 } 6942 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6943 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6944 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6945 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6946 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6947 ThreadLimitVal = 6948 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6949 } 6950 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6951 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6952 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6953 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6954 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6955 NumThreadsVal = 6956 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6957 ThreadLimitVal = ThreadLimitVal 6958 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6959 ThreadLimitVal), 6960 NumThreadsVal, ThreadLimitVal) 6961 : NumThreadsVal; 6962 } 6963 if (!ThreadLimitVal) 6964 ThreadLimitVal = Bld.getInt32(0); 6965 if (CondVal) 6966 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6967 return ThreadLimitVal; 6968 } 6969 case OMPD_target_teams_distribute_simd: 6970 case OMPD_target_simd: 6971 return Bld.getInt32(1); 6972 case OMPD_parallel: 6973 case OMPD_for: 6974 case OMPD_parallel_for: 6975 case OMPD_parallel_sections: 6976 case OMPD_for_simd: 6977 case OMPD_parallel_for_simd: 6978 case OMPD_cancel: 6979 case OMPD_cancellation_point: 6980 case OMPD_ordered: 6981 case OMPD_threadprivate: 6982 case OMPD_allocate: 6983 case OMPD_task: 6984 case OMPD_simd: 6985 case OMPD_sections: 6986 case OMPD_section: 6987 case OMPD_single: 6988 case OMPD_master: 6989 case OMPD_critical: 6990 case OMPD_taskyield: 6991 case OMPD_barrier: 6992 case OMPD_taskwait: 6993 case OMPD_taskgroup: 6994 case OMPD_atomic: 6995 case OMPD_flush: 6996 case OMPD_teams: 6997 case OMPD_target_data: 6998 case OMPD_target_exit_data: 6999 case OMPD_target_enter_data: 7000 case OMPD_distribute: 7001 case OMPD_distribute_simd: 7002 case OMPD_distribute_parallel_for: 7003 case OMPD_distribute_parallel_for_simd: 7004 case OMPD_teams_distribute: 7005 case OMPD_teams_distribute_simd: 7006 case OMPD_teams_distribute_parallel_for: 7007 case OMPD_teams_distribute_parallel_for_simd: 7008 case OMPD_target_update: 7009 case OMPD_declare_simd: 7010 case OMPD_declare_variant: 7011 case OMPD_declare_target: 7012 case OMPD_end_declare_target: 7013 case OMPD_declare_reduction: 7014 case OMPD_declare_mapper: 7015 case OMPD_taskloop: 7016 case OMPD_taskloop_simd: 7017 case OMPD_master_taskloop: 7018 case OMPD_master_taskloop_simd: 7019 case OMPD_parallel_master_taskloop: 7020 case OMPD_requires: 7021 case OMPD_unknown: 7022 break; 7023 } 7024 llvm_unreachable("Unsupported directive kind."); 7025 } 7026 7027 namespace { 7028 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7029 7030 // Utility to handle information from clauses associated with a given 7031 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7032 // It provides a convenient interface to obtain the information and generate 7033 // code for that information. 7034 class MappableExprsHandler { 7035 public: 7036 /// Values for bit flags used to specify the mapping type for 7037 /// offloading. 7038 enum OpenMPOffloadMappingFlags : uint64_t { 7039 /// No flags 7040 OMP_MAP_NONE = 0x0, 7041 /// Allocate memory on the device and move data from host to device. 7042 OMP_MAP_TO = 0x01, 7043 /// Allocate memory on the device and move data from device to host. 7044 OMP_MAP_FROM = 0x02, 7045 /// Always perform the requested mapping action on the element, even 7046 /// if it was already mapped before. 7047 OMP_MAP_ALWAYS = 0x04, 7048 /// Delete the element from the device environment, ignoring the 7049 /// current reference count associated with the element. 7050 OMP_MAP_DELETE = 0x08, 7051 /// The element being mapped is a pointer-pointee pair; both the 7052 /// pointer and the pointee should be mapped. 7053 OMP_MAP_PTR_AND_OBJ = 0x10, 7054 /// This flags signals that the base address of an entry should be 7055 /// passed to the target kernel as an argument. 7056 OMP_MAP_TARGET_PARAM = 0x20, 7057 /// Signal that the runtime library has to return the device pointer 7058 /// in the current position for the data being mapped. Used when we have the 7059 /// use_device_ptr clause. 7060 OMP_MAP_RETURN_PARAM = 0x40, 7061 /// This flag signals that the reference being passed is a pointer to 7062 /// private data. 7063 OMP_MAP_PRIVATE = 0x80, 7064 /// Pass the element to the device by value. 7065 OMP_MAP_LITERAL = 0x100, 7066 /// Implicit map 7067 OMP_MAP_IMPLICIT = 0x200, 7068 /// Close is a hint to the runtime to allocate memory close to 7069 /// the target device. 7070 OMP_MAP_CLOSE = 0x400, 7071 /// The 16 MSBs of the flags indicate whether the entry is member of some 7072 /// struct/class. 7073 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7074 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7075 }; 7076 7077 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7078 static unsigned getFlagMemberOffset() { 7079 unsigned Offset = 0; 7080 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7081 Remain = Remain >> 1) 7082 Offset++; 7083 return Offset; 7084 } 7085 7086 /// Class that associates information with a base pointer to be passed to the 7087 /// runtime library. 7088 class BasePointerInfo { 7089 /// The base pointer. 7090 llvm::Value *Ptr = nullptr; 7091 /// The base declaration that refers to this device pointer, or null if 7092 /// there is none. 7093 const ValueDecl *DevPtrDecl = nullptr; 7094 7095 public: 7096 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7097 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7098 llvm::Value *operator*() const { return Ptr; } 7099 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7100 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7101 }; 7102 7103 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7104 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7105 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7106 7107 /// Map between a struct and the its lowest & highest elements which have been 7108 /// mapped. 7109 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7110 /// HE(FieldIndex, Pointer)} 7111 struct StructRangeInfoTy { 7112 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7113 0, Address::invalid()}; 7114 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7115 0, Address::invalid()}; 7116 Address Base = Address::invalid(); 7117 }; 7118 7119 private: 7120 /// Kind that defines how a device pointer has to be returned. 7121 struct MapInfo { 7122 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7123 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7124 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7125 bool ReturnDevicePointer = false; 7126 bool IsImplicit = false; 7127 7128 MapInfo() = default; 7129 MapInfo( 7130 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7131 OpenMPMapClauseKind MapType, 7132 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7133 bool ReturnDevicePointer, bool IsImplicit) 7134 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7135 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7136 }; 7137 7138 /// If use_device_ptr is used on a pointer which is a struct member and there 7139 /// is no map information about it, then emission of that entry is deferred 7140 /// until the whole struct has been processed. 7141 struct DeferredDevicePtrEntryTy { 7142 const Expr *IE = nullptr; 7143 const ValueDecl *VD = nullptr; 7144 7145 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7146 : IE(IE), VD(VD) {} 7147 }; 7148 7149 /// The target directive from where the mappable clauses were extracted. It 7150 /// is either a executable directive or a user-defined mapper directive. 7151 llvm::PointerUnion<const OMPExecutableDirective *, 7152 const OMPDeclareMapperDecl *> 7153 CurDir; 7154 7155 /// Function the directive is being generated for. 7156 CodeGenFunction &CGF; 7157 7158 /// Set of all first private variables in the current directive. 7159 /// bool data is set to true if the variable is implicitly marked as 7160 /// firstprivate, false otherwise. 7161 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7162 7163 /// Map between device pointer declarations and their expression components. 7164 /// The key value for declarations in 'this' is null. 7165 llvm::DenseMap< 7166 const ValueDecl *, 7167 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7168 DevPointersMap; 7169 7170 llvm::Value *getExprTypeSize(const Expr *E) const { 7171 QualType ExprTy = E->getType().getCanonicalType(); 7172 7173 // Reference types are ignored for mapping purposes. 7174 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7175 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7176 7177 // Given that an array section is considered a built-in type, we need to 7178 // do the calculation based on the length of the section instead of relying 7179 // on CGF.getTypeSize(E->getType()). 7180 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7181 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7182 OAE->getBase()->IgnoreParenImpCasts()) 7183 .getCanonicalType(); 7184 7185 // If there is no length associated with the expression and lower bound is 7186 // not specified too, that means we are using the whole length of the 7187 // base. 7188 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7189 !OAE->getLowerBound()) 7190 return CGF.getTypeSize(BaseTy); 7191 7192 llvm::Value *ElemSize; 7193 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7194 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7195 } else { 7196 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7197 assert(ATy && "Expecting array type if not a pointer type."); 7198 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7199 } 7200 7201 // If we don't have a length at this point, that is because we have an 7202 // array section with a single element. 7203 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7204 return ElemSize; 7205 7206 if (const Expr *LenExpr = OAE->getLength()) { 7207 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7208 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7209 CGF.getContext().getSizeType(), 7210 LenExpr->getExprLoc()); 7211 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7212 } 7213 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7214 OAE->getLowerBound() && "expected array_section[lb:]."); 7215 // Size = sizetype - lb * elemtype; 7216 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7217 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7218 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7219 CGF.getContext().getSizeType(), 7220 OAE->getLowerBound()->getExprLoc()); 7221 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7222 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7223 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7224 LengthVal = CGF.Builder.CreateSelect( 7225 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7226 return LengthVal; 7227 } 7228 return CGF.getTypeSize(ExprTy); 7229 } 7230 7231 /// Return the corresponding bits for a given map clause modifier. Add 7232 /// a flag marking the map as a pointer if requested. Add a flag marking the 7233 /// map as the first one of a series of maps that relate to the same map 7234 /// expression. 7235 OpenMPOffloadMappingFlags getMapTypeBits( 7236 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7237 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7238 OpenMPOffloadMappingFlags Bits = 7239 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7240 switch (MapType) { 7241 case OMPC_MAP_alloc: 7242 case OMPC_MAP_release: 7243 // alloc and release is the default behavior in the runtime library, i.e. 7244 // if we don't pass any bits alloc/release that is what the runtime is 7245 // going to do. Therefore, we don't need to signal anything for these two 7246 // type modifiers. 7247 break; 7248 case OMPC_MAP_to: 7249 Bits |= OMP_MAP_TO; 7250 break; 7251 case OMPC_MAP_from: 7252 Bits |= OMP_MAP_FROM; 7253 break; 7254 case OMPC_MAP_tofrom: 7255 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7256 break; 7257 case OMPC_MAP_delete: 7258 Bits |= OMP_MAP_DELETE; 7259 break; 7260 case OMPC_MAP_unknown: 7261 llvm_unreachable("Unexpected map type!"); 7262 } 7263 if (AddPtrFlag) 7264 Bits |= OMP_MAP_PTR_AND_OBJ; 7265 if (AddIsTargetParamFlag) 7266 Bits |= OMP_MAP_TARGET_PARAM; 7267 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7268 != MapModifiers.end()) 7269 Bits |= OMP_MAP_ALWAYS; 7270 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7271 != MapModifiers.end()) 7272 Bits |= OMP_MAP_CLOSE; 7273 return Bits; 7274 } 7275 7276 /// Return true if the provided expression is a final array section. A 7277 /// final array section, is one whose length can't be proved to be one. 7278 bool isFinalArraySectionExpression(const Expr *E) const { 7279 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7280 7281 // It is not an array section and therefore not a unity-size one. 7282 if (!OASE) 7283 return false; 7284 7285 // An array section with no colon always refer to a single element. 7286 if (OASE->getColonLoc().isInvalid()) 7287 return false; 7288 7289 const Expr *Length = OASE->getLength(); 7290 7291 // If we don't have a length we have to check if the array has size 1 7292 // for this dimension. Also, we should always expect a length if the 7293 // base type is pointer. 7294 if (!Length) { 7295 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7296 OASE->getBase()->IgnoreParenImpCasts()) 7297 .getCanonicalType(); 7298 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7299 return ATy->getSize().getSExtValue() != 1; 7300 // If we don't have a constant dimension length, we have to consider 7301 // the current section as having any size, so it is not necessarily 7302 // unitary. If it happen to be unity size, that's user fault. 7303 return true; 7304 } 7305 7306 // Check if the length evaluates to 1. 7307 Expr::EvalResult Result; 7308 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7309 return true; // Can have more that size 1. 7310 7311 llvm::APSInt ConstLength = Result.Val.getInt(); 7312 return ConstLength.getSExtValue() != 1; 7313 } 7314 7315 /// Generate the base pointers, section pointers, sizes and map type 7316 /// bits for the provided map type, map modifier, and expression components. 7317 /// \a IsFirstComponent should be set to true if the provided set of 7318 /// components is the first associated with a capture. 7319 void generateInfoForComponentList( 7320 OpenMPMapClauseKind MapType, 7321 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7322 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7323 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7324 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7325 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7326 bool IsImplicit, 7327 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7328 OverlappedElements = llvm::None) const { 7329 // The following summarizes what has to be generated for each map and the 7330 // types below. The generated information is expressed in this order: 7331 // base pointer, section pointer, size, flags 7332 // (to add to the ones that come from the map type and modifier). 7333 // 7334 // double d; 7335 // int i[100]; 7336 // float *p; 7337 // 7338 // struct S1 { 7339 // int i; 7340 // float f[50]; 7341 // } 7342 // struct S2 { 7343 // int i; 7344 // float f[50]; 7345 // S1 s; 7346 // double *p; 7347 // struct S2 *ps; 7348 // } 7349 // S2 s; 7350 // S2 *ps; 7351 // 7352 // map(d) 7353 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7354 // 7355 // map(i) 7356 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7357 // 7358 // map(i[1:23]) 7359 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7360 // 7361 // map(p) 7362 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7363 // 7364 // map(p[1:24]) 7365 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7366 // 7367 // map(s) 7368 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7369 // 7370 // map(s.i) 7371 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7372 // 7373 // map(s.s.f) 7374 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7375 // 7376 // map(s.p) 7377 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7378 // 7379 // map(to: s.p[:22]) 7380 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7381 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7382 // &(s.p), &(s.p[0]), 22*sizeof(double), 7383 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7384 // (*) alloc space for struct members, only this is a target parameter 7385 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7386 // optimizes this entry out, same in the examples below) 7387 // (***) map the pointee (map: to) 7388 // 7389 // map(s.ps) 7390 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7391 // 7392 // map(from: s.ps->s.i) 7393 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7394 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7395 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7396 // 7397 // map(to: s.ps->ps) 7398 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7399 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7400 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7401 // 7402 // map(s.ps->ps->ps) 7403 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7404 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7405 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7406 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7407 // 7408 // map(to: s.ps->ps->s.f[:22]) 7409 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7410 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7411 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7412 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7413 // 7414 // map(ps) 7415 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7416 // 7417 // map(ps->i) 7418 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7419 // 7420 // map(ps->s.f) 7421 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7422 // 7423 // map(from: ps->p) 7424 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7425 // 7426 // map(to: ps->p[:22]) 7427 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7428 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7429 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7430 // 7431 // map(ps->ps) 7432 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7433 // 7434 // map(from: ps->ps->s.i) 7435 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7436 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7437 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7438 // 7439 // map(from: ps->ps->ps) 7440 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7441 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7442 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7443 // 7444 // map(ps->ps->ps->ps) 7445 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7446 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7447 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7448 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7449 // 7450 // map(to: ps->ps->ps->s.f[:22]) 7451 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7452 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7453 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7454 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7455 // 7456 // map(to: s.f[:22]) map(from: s.p[:33]) 7457 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7458 // sizeof(double*) (**), TARGET_PARAM 7459 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7460 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7461 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7462 // (*) allocate contiguous space needed to fit all mapped members even if 7463 // we allocate space for members not mapped (in this example, 7464 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7465 // them as well because they fall between &s.f[0] and &s.p) 7466 // 7467 // map(from: s.f[:22]) map(to: ps->p[:33]) 7468 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7469 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7470 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7471 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7472 // (*) the struct this entry pertains to is the 2nd element in the list of 7473 // arguments, hence MEMBER_OF(2) 7474 // 7475 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7476 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7477 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7478 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7479 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7480 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7481 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7482 // (*) the struct this entry pertains to is the 4th element in the list 7483 // of arguments, hence MEMBER_OF(4) 7484 7485 // Track if the map information being generated is the first for a capture. 7486 bool IsCaptureFirstInfo = IsFirstComponentList; 7487 // When the variable is on a declare target link or in a to clause with 7488 // unified memory, a reference is needed to hold the host/device address 7489 // of the variable. 7490 bool RequiresReference = false; 7491 7492 // Scan the components from the base to the complete expression. 7493 auto CI = Components.rbegin(); 7494 auto CE = Components.rend(); 7495 auto I = CI; 7496 7497 // Track if the map information being generated is the first for a list of 7498 // components. 7499 bool IsExpressionFirstInfo = true; 7500 Address BP = Address::invalid(); 7501 const Expr *AssocExpr = I->getAssociatedExpression(); 7502 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7503 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7504 7505 if (isa<MemberExpr>(AssocExpr)) { 7506 // The base is the 'this' pointer. The content of the pointer is going 7507 // to be the base of the field being mapped. 7508 BP = CGF.LoadCXXThisAddress(); 7509 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7510 (OASE && 7511 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7512 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7513 } else { 7514 // The base is the reference to the variable. 7515 // BP = &Var. 7516 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7517 if (const auto *VD = 7518 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7519 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7520 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7521 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7522 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7523 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7524 RequiresReference = true; 7525 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7526 } 7527 } 7528 } 7529 7530 // If the variable is a pointer and is being dereferenced (i.e. is not 7531 // the last component), the base has to be the pointer itself, not its 7532 // reference. References are ignored for mapping purposes. 7533 QualType Ty = 7534 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7535 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7536 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7537 7538 // We do not need to generate individual map information for the 7539 // pointer, it can be associated with the combined storage. 7540 ++I; 7541 } 7542 } 7543 7544 // Track whether a component of the list should be marked as MEMBER_OF some 7545 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7546 // in a component list should be marked as MEMBER_OF, all subsequent entries 7547 // do not belong to the base struct. E.g. 7548 // struct S2 s; 7549 // s.ps->ps->ps->f[:] 7550 // (1) (2) (3) (4) 7551 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7552 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7553 // is the pointee of ps(2) which is not member of struct s, so it should not 7554 // be marked as such (it is still PTR_AND_OBJ). 7555 // The variable is initialized to false so that PTR_AND_OBJ entries which 7556 // are not struct members are not considered (e.g. array of pointers to 7557 // data). 7558 bool ShouldBeMemberOf = false; 7559 7560 // Variable keeping track of whether or not we have encountered a component 7561 // in the component list which is a member expression. Useful when we have a 7562 // pointer or a final array section, in which case it is the previous 7563 // component in the list which tells us whether we have a member expression. 7564 // E.g. X.f[:] 7565 // While processing the final array section "[:]" it is "f" which tells us 7566 // whether we are dealing with a member of a declared struct. 7567 const MemberExpr *EncounteredME = nullptr; 7568 7569 for (; I != CE; ++I) { 7570 // If the current component is member of a struct (parent struct) mark it. 7571 if (!EncounteredME) { 7572 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7573 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7574 // as MEMBER_OF the parent struct. 7575 if (EncounteredME) 7576 ShouldBeMemberOf = true; 7577 } 7578 7579 auto Next = std::next(I); 7580 7581 // We need to generate the addresses and sizes if this is the last 7582 // component, if the component is a pointer or if it is an array section 7583 // whose length can't be proved to be one. If this is a pointer, it 7584 // becomes the base address for the following components. 7585 7586 // A final array section, is one whose length can't be proved to be one. 7587 bool IsFinalArraySection = 7588 isFinalArraySectionExpression(I->getAssociatedExpression()); 7589 7590 // Get information on whether the element is a pointer. Have to do a 7591 // special treatment for array sections given that they are built-in 7592 // types. 7593 const auto *OASE = 7594 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7595 bool IsPointer = 7596 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7597 .getCanonicalType() 7598 ->isAnyPointerType()) || 7599 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7600 7601 if (Next == CE || IsPointer || IsFinalArraySection) { 7602 // If this is not the last component, we expect the pointer to be 7603 // associated with an array expression or member expression. 7604 assert((Next == CE || 7605 isa<MemberExpr>(Next->getAssociatedExpression()) || 7606 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7607 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7608 "Unexpected expression"); 7609 7610 Address LB = 7611 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7612 7613 // If this component is a pointer inside the base struct then we don't 7614 // need to create any entry for it - it will be combined with the object 7615 // it is pointing to into a single PTR_AND_OBJ entry. 7616 bool IsMemberPointer = 7617 IsPointer && EncounteredME && 7618 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7619 EncounteredME); 7620 if (!OverlappedElements.empty()) { 7621 // Handle base element with the info for overlapped elements. 7622 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7623 assert(Next == CE && 7624 "Expected last element for the overlapped elements."); 7625 assert(!IsPointer && 7626 "Unexpected base element with the pointer type."); 7627 // Mark the whole struct as the struct that requires allocation on the 7628 // device. 7629 PartialStruct.LowestElem = {0, LB}; 7630 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7631 I->getAssociatedExpression()->getType()); 7632 Address HB = CGF.Builder.CreateConstGEP( 7633 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7634 CGF.VoidPtrTy), 7635 TypeSize.getQuantity() - 1); 7636 PartialStruct.HighestElem = { 7637 std::numeric_limits<decltype( 7638 PartialStruct.HighestElem.first)>::max(), 7639 HB}; 7640 PartialStruct.Base = BP; 7641 // Emit data for non-overlapped data. 7642 OpenMPOffloadMappingFlags Flags = 7643 OMP_MAP_MEMBER_OF | 7644 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7645 /*AddPtrFlag=*/false, 7646 /*AddIsTargetParamFlag=*/false); 7647 LB = BP; 7648 llvm::Value *Size = nullptr; 7649 // Do bitcopy of all non-overlapped structure elements. 7650 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7651 Component : OverlappedElements) { 7652 Address ComponentLB = Address::invalid(); 7653 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7654 Component) { 7655 if (MC.getAssociatedDeclaration()) { 7656 ComponentLB = 7657 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7658 .getAddress(); 7659 Size = CGF.Builder.CreatePtrDiff( 7660 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7661 CGF.EmitCastToVoidPtr(LB.getPointer())); 7662 break; 7663 } 7664 } 7665 BasePointers.push_back(BP.getPointer()); 7666 Pointers.push_back(LB.getPointer()); 7667 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7668 /*isSigned=*/true)); 7669 Types.push_back(Flags); 7670 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7671 } 7672 BasePointers.push_back(BP.getPointer()); 7673 Pointers.push_back(LB.getPointer()); 7674 Size = CGF.Builder.CreatePtrDiff( 7675 CGF.EmitCastToVoidPtr( 7676 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7677 CGF.EmitCastToVoidPtr(LB.getPointer())); 7678 Sizes.push_back( 7679 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7680 Types.push_back(Flags); 7681 break; 7682 } 7683 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7684 if (!IsMemberPointer) { 7685 BasePointers.push_back(BP.getPointer()); 7686 Pointers.push_back(LB.getPointer()); 7687 Sizes.push_back( 7688 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7689 7690 // We need to add a pointer flag for each map that comes from the 7691 // same expression except for the first one. We also need to signal 7692 // this map is the first one that relates with the current capture 7693 // (there is a set of entries for each capture). 7694 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7695 MapType, MapModifiers, IsImplicit, 7696 !IsExpressionFirstInfo || RequiresReference, 7697 IsCaptureFirstInfo && !RequiresReference); 7698 7699 if (!IsExpressionFirstInfo) { 7700 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7701 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7702 if (IsPointer) 7703 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7704 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7705 7706 if (ShouldBeMemberOf) { 7707 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7708 // should be later updated with the correct value of MEMBER_OF. 7709 Flags |= OMP_MAP_MEMBER_OF; 7710 // From now on, all subsequent PTR_AND_OBJ entries should not be 7711 // marked as MEMBER_OF. 7712 ShouldBeMemberOf = false; 7713 } 7714 } 7715 7716 Types.push_back(Flags); 7717 } 7718 7719 // If we have encountered a member expression so far, keep track of the 7720 // mapped member. If the parent is "*this", then the value declaration 7721 // is nullptr. 7722 if (EncounteredME) { 7723 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7724 unsigned FieldIndex = FD->getFieldIndex(); 7725 7726 // Update info about the lowest and highest elements for this struct 7727 if (!PartialStruct.Base.isValid()) { 7728 PartialStruct.LowestElem = {FieldIndex, LB}; 7729 PartialStruct.HighestElem = {FieldIndex, LB}; 7730 PartialStruct.Base = BP; 7731 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7732 PartialStruct.LowestElem = {FieldIndex, LB}; 7733 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7734 PartialStruct.HighestElem = {FieldIndex, LB}; 7735 } 7736 } 7737 7738 // If we have a final array section, we are done with this expression. 7739 if (IsFinalArraySection) 7740 break; 7741 7742 // The pointer becomes the base for the next element. 7743 if (Next != CE) 7744 BP = LB; 7745 7746 IsExpressionFirstInfo = false; 7747 IsCaptureFirstInfo = false; 7748 } 7749 } 7750 } 7751 7752 /// Return the adjusted map modifiers if the declaration a capture refers to 7753 /// appears in a first-private clause. This is expected to be used only with 7754 /// directives that start with 'target'. 7755 MappableExprsHandler::OpenMPOffloadMappingFlags 7756 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7757 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7758 7759 // A first private variable captured by reference will use only the 7760 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7761 // declaration is known as first-private in this handler. 7762 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7763 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7764 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7765 return MappableExprsHandler::OMP_MAP_ALWAYS | 7766 MappableExprsHandler::OMP_MAP_TO; 7767 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7768 return MappableExprsHandler::OMP_MAP_TO | 7769 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7770 return MappableExprsHandler::OMP_MAP_PRIVATE | 7771 MappableExprsHandler::OMP_MAP_TO; 7772 } 7773 return MappableExprsHandler::OMP_MAP_TO | 7774 MappableExprsHandler::OMP_MAP_FROM; 7775 } 7776 7777 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7778 // Rotate by getFlagMemberOffset() bits. 7779 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7780 << getFlagMemberOffset()); 7781 } 7782 7783 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7784 OpenMPOffloadMappingFlags MemberOfFlag) { 7785 // If the entry is PTR_AND_OBJ but has not been marked with the special 7786 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7787 // marked as MEMBER_OF. 7788 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7789 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7790 return; 7791 7792 // Reset the placeholder value to prepare the flag for the assignment of the 7793 // proper MEMBER_OF value. 7794 Flags &= ~OMP_MAP_MEMBER_OF; 7795 Flags |= MemberOfFlag; 7796 } 7797 7798 void getPlainLayout(const CXXRecordDecl *RD, 7799 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7800 bool AsBase) const { 7801 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7802 7803 llvm::StructType *St = 7804 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7805 7806 unsigned NumElements = St->getNumElements(); 7807 llvm::SmallVector< 7808 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7809 RecordLayout(NumElements); 7810 7811 // Fill bases. 7812 for (const auto &I : RD->bases()) { 7813 if (I.isVirtual()) 7814 continue; 7815 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7816 // Ignore empty bases. 7817 if (Base->isEmpty() || CGF.getContext() 7818 .getASTRecordLayout(Base) 7819 .getNonVirtualSize() 7820 .isZero()) 7821 continue; 7822 7823 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7824 RecordLayout[FieldIndex] = Base; 7825 } 7826 // Fill in virtual bases. 7827 for (const auto &I : RD->vbases()) { 7828 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7829 // Ignore empty bases. 7830 if (Base->isEmpty()) 7831 continue; 7832 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7833 if (RecordLayout[FieldIndex]) 7834 continue; 7835 RecordLayout[FieldIndex] = Base; 7836 } 7837 // Fill in all the fields. 7838 assert(!RD->isUnion() && "Unexpected union."); 7839 for (const auto *Field : RD->fields()) { 7840 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7841 // will fill in later.) 7842 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7843 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7844 RecordLayout[FieldIndex] = Field; 7845 } 7846 } 7847 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7848 &Data : RecordLayout) { 7849 if (Data.isNull()) 7850 continue; 7851 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7852 getPlainLayout(Base, Layout, /*AsBase=*/true); 7853 else 7854 Layout.push_back(Data.get<const FieldDecl *>()); 7855 } 7856 } 7857 7858 public: 7859 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7860 : CurDir(&Dir), CGF(CGF) { 7861 // Extract firstprivate clause information. 7862 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7863 for (const auto *D : C->varlists()) 7864 FirstPrivateDecls.try_emplace( 7865 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7866 // Extract device pointer clause information. 7867 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7868 for (auto L : C->component_lists()) 7869 DevPointersMap[L.first].push_back(L.second); 7870 } 7871 7872 /// Constructor for the declare mapper directive. 7873 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7874 : CurDir(&Dir), CGF(CGF) {} 7875 7876 /// Generate code for the combined entry if we have a partially mapped struct 7877 /// and take care of the mapping flags of the arguments corresponding to 7878 /// individual struct members. 7879 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7880 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7881 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7882 const StructRangeInfoTy &PartialStruct) const { 7883 // Base is the base of the struct 7884 BasePointers.push_back(PartialStruct.Base.getPointer()); 7885 // Pointer is the address of the lowest element 7886 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7887 Pointers.push_back(LB); 7888 // Size is (addr of {highest+1} element) - (addr of lowest element) 7889 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7890 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7891 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7892 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7893 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7894 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7895 /*isSigned=*/false); 7896 Sizes.push_back(Size); 7897 // Map type is always TARGET_PARAM 7898 Types.push_back(OMP_MAP_TARGET_PARAM); 7899 // Remove TARGET_PARAM flag from the first element 7900 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7901 7902 // All other current entries will be MEMBER_OF the combined entry 7903 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7904 // 0xFFFF in the MEMBER_OF field). 7905 OpenMPOffloadMappingFlags MemberOfFlag = 7906 getMemberOfFlag(BasePointers.size() - 1); 7907 for (auto &M : CurTypes) 7908 setCorrectMemberOfFlag(M, MemberOfFlag); 7909 } 7910 7911 /// Generate all the base pointers, section pointers, sizes and map 7912 /// types for the extracted mappable expressions. Also, for each item that 7913 /// relates with a device pointer, a pair of the relevant declaration and 7914 /// index where it occurs is appended to the device pointers info array. 7915 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7916 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7917 MapFlagsArrayTy &Types) const { 7918 // We have to process the component lists that relate with the same 7919 // declaration in a single chunk so that we can generate the map flags 7920 // correctly. Therefore, we organize all lists in a map. 7921 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7922 7923 // Helper function to fill the information map for the different supported 7924 // clauses. 7925 auto &&InfoGen = [&Info]( 7926 const ValueDecl *D, 7927 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7928 OpenMPMapClauseKind MapType, 7929 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7930 bool ReturnDevicePointer, bool IsImplicit) { 7931 const ValueDecl *VD = 7932 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7933 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7934 IsImplicit); 7935 }; 7936 7937 assert(CurDir.is<const OMPExecutableDirective *>() && 7938 "Expect a executable directive"); 7939 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7940 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7941 for (const auto &L : C->component_lists()) { 7942 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7943 /*ReturnDevicePointer=*/false, C->isImplicit()); 7944 } 7945 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7946 for (const auto &L : C->component_lists()) { 7947 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7948 /*ReturnDevicePointer=*/false, C->isImplicit()); 7949 } 7950 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7951 for (const auto &L : C->component_lists()) { 7952 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7953 /*ReturnDevicePointer=*/false, C->isImplicit()); 7954 } 7955 7956 // Look at the use_device_ptr clause information and mark the existing map 7957 // entries as such. If there is no map information for an entry in the 7958 // use_device_ptr list, we create one with map type 'alloc' and zero size 7959 // section. It is the user fault if that was not mapped before. If there is 7960 // no map information and the pointer is a struct member, then we defer the 7961 // emission of that entry until the whole struct has been processed. 7962 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7963 DeferredInfo; 7964 7965 for (const auto *C : 7966 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7967 for (const auto &L : C->component_lists()) { 7968 assert(!L.second.empty() && "Not expecting empty list of components!"); 7969 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7970 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7971 const Expr *IE = L.second.back().getAssociatedExpression(); 7972 // If the first component is a member expression, we have to look into 7973 // 'this', which maps to null in the map of map information. Otherwise 7974 // look directly for the information. 7975 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7976 7977 // We potentially have map information for this declaration already. 7978 // Look for the first set of components that refer to it. 7979 if (It != Info.end()) { 7980 auto CI = std::find_if( 7981 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7982 return MI.Components.back().getAssociatedDeclaration() == VD; 7983 }); 7984 // If we found a map entry, signal that the pointer has to be returned 7985 // and move on to the next declaration. 7986 if (CI != It->second.end()) { 7987 CI->ReturnDevicePointer = true; 7988 continue; 7989 } 7990 } 7991 7992 // We didn't find any match in our map information - generate a zero 7993 // size array section - if the pointer is a struct member we defer this 7994 // action until the whole struct has been processed. 7995 if (isa<MemberExpr>(IE)) { 7996 // Insert the pointer into Info to be processed by 7997 // generateInfoForComponentList. Because it is a member pointer 7998 // without a pointee, no entry will be generated for it, therefore 7999 // we need to generate one after the whole struct has been processed. 8000 // Nonetheless, generateInfoForComponentList must be called to take 8001 // the pointer into account for the calculation of the range of the 8002 // partial struct. 8003 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8004 /*ReturnDevicePointer=*/false, C->isImplicit()); 8005 DeferredInfo[nullptr].emplace_back(IE, VD); 8006 } else { 8007 llvm::Value *Ptr = 8008 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8009 BasePointers.emplace_back(Ptr, VD); 8010 Pointers.push_back(Ptr); 8011 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8012 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8013 } 8014 } 8015 } 8016 8017 for (const auto &M : Info) { 8018 // We need to know when we generate information for the first component 8019 // associated with a capture, because the mapping flags depend on it. 8020 bool IsFirstComponentList = true; 8021 8022 // Temporary versions of arrays 8023 MapBaseValuesArrayTy CurBasePointers; 8024 MapValuesArrayTy CurPointers; 8025 MapValuesArrayTy CurSizes; 8026 MapFlagsArrayTy CurTypes; 8027 StructRangeInfoTy PartialStruct; 8028 8029 for (const MapInfo &L : M.second) { 8030 assert(!L.Components.empty() && 8031 "Not expecting declaration with no component lists."); 8032 8033 // Remember the current base pointer index. 8034 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8035 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8036 CurBasePointers, CurPointers, CurSizes, 8037 CurTypes, PartialStruct, 8038 IsFirstComponentList, L.IsImplicit); 8039 8040 // If this entry relates with a device pointer, set the relevant 8041 // declaration and add the 'return pointer' flag. 8042 if (L.ReturnDevicePointer) { 8043 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8044 "Unexpected number of mapped base pointers."); 8045 8046 const ValueDecl *RelevantVD = 8047 L.Components.back().getAssociatedDeclaration(); 8048 assert(RelevantVD && 8049 "No relevant declaration related with device pointer??"); 8050 8051 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8052 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8053 } 8054 IsFirstComponentList = false; 8055 } 8056 8057 // Append any pending zero-length pointers which are struct members and 8058 // used with use_device_ptr. 8059 auto CI = DeferredInfo.find(M.first); 8060 if (CI != DeferredInfo.end()) { 8061 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8062 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8063 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8064 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8065 CurBasePointers.emplace_back(BasePtr, L.VD); 8066 CurPointers.push_back(Ptr); 8067 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8068 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8069 // value MEMBER_OF=FFFF so that the entry is later updated with the 8070 // correct value of MEMBER_OF. 8071 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8072 OMP_MAP_MEMBER_OF); 8073 } 8074 } 8075 8076 // If there is an entry in PartialStruct it means we have a struct with 8077 // individual members mapped. Emit an extra combined entry. 8078 if (PartialStruct.Base.isValid()) 8079 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8080 PartialStruct); 8081 8082 // We need to append the results of this capture to what we already have. 8083 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8084 Pointers.append(CurPointers.begin(), CurPointers.end()); 8085 Sizes.append(CurSizes.begin(), CurSizes.end()); 8086 Types.append(CurTypes.begin(), CurTypes.end()); 8087 } 8088 } 8089 8090 /// Generate all the base pointers, section pointers, sizes and map types for 8091 /// the extracted map clauses of user-defined mapper. 8092 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8093 MapValuesArrayTy &Pointers, 8094 MapValuesArrayTy &Sizes, 8095 MapFlagsArrayTy &Types) const { 8096 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8097 "Expect a declare mapper directive"); 8098 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8099 // We have to process the component lists that relate with the same 8100 // declaration in a single chunk so that we can generate the map flags 8101 // correctly. Therefore, we organize all lists in a map. 8102 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8103 8104 // Helper function to fill the information map for the different supported 8105 // clauses. 8106 auto &&InfoGen = [&Info]( 8107 const ValueDecl *D, 8108 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8109 OpenMPMapClauseKind MapType, 8110 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8111 bool ReturnDevicePointer, bool IsImplicit) { 8112 const ValueDecl *VD = 8113 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8114 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8115 IsImplicit); 8116 }; 8117 8118 for (const auto *C : CurMapperDir->clauselists()) { 8119 const auto *MC = cast<OMPMapClause>(C); 8120 for (const auto &L : MC->component_lists()) { 8121 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8122 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8123 } 8124 } 8125 8126 for (const auto &M : Info) { 8127 // We need to know when we generate information for the first component 8128 // associated with a capture, because the mapping flags depend on it. 8129 bool IsFirstComponentList = true; 8130 8131 // Temporary versions of arrays 8132 MapBaseValuesArrayTy CurBasePointers; 8133 MapValuesArrayTy CurPointers; 8134 MapValuesArrayTy CurSizes; 8135 MapFlagsArrayTy CurTypes; 8136 StructRangeInfoTy PartialStruct; 8137 8138 for (const MapInfo &L : M.second) { 8139 assert(!L.Components.empty() && 8140 "Not expecting declaration with no component lists."); 8141 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8142 CurBasePointers, CurPointers, CurSizes, 8143 CurTypes, PartialStruct, 8144 IsFirstComponentList, L.IsImplicit); 8145 IsFirstComponentList = false; 8146 } 8147 8148 // If there is an entry in PartialStruct it means we have a struct with 8149 // individual members mapped. Emit an extra combined entry. 8150 if (PartialStruct.Base.isValid()) 8151 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8152 PartialStruct); 8153 8154 // We need to append the results of this capture to what we already have. 8155 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8156 Pointers.append(CurPointers.begin(), CurPointers.end()); 8157 Sizes.append(CurSizes.begin(), CurSizes.end()); 8158 Types.append(CurTypes.begin(), CurTypes.end()); 8159 } 8160 } 8161 8162 /// Emit capture info for lambdas for variables captured by reference. 8163 void generateInfoForLambdaCaptures( 8164 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8165 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8166 MapFlagsArrayTy &Types, 8167 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8168 const auto *RD = VD->getType() 8169 .getCanonicalType() 8170 .getNonReferenceType() 8171 ->getAsCXXRecordDecl(); 8172 if (!RD || !RD->isLambda()) 8173 return; 8174 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8175 LValue VDLVal = CGF.MakeAddrLValue( 8176 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8177 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8178 FieldDecl *ThisCapture = nullptr; 8179 RD->getCaptureFields(Captures, ThisCapture); 8180 if (ThisCapture) { 8181 LValue ThisLVal = 8182 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8183 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8184 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8185 BasePointers.push_back(ThisLVal.getPointer()); 8186 Pointers.push_back(ThisLValVal.getPointer()); 8187 Sizes.push_back( 8188 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8189 CGF.Int64Ty, /*isSigned=*/true)); 8190 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8191 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8192 } 8193 for (const LambdaCapture &LC : RD->captures()) { 8194 if (!LC.capturesVariable()) 8195 continue; 8196 const VarDecl *VD = LC.getCapturedVar(); 8197 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8198 continue; 8199 auto It = Captures.find(VD); 8200 assert(It != Captures.end() && "Found lambda capture without field."); 8201 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8202 if (LC.getCaptureKind() == LCK_ByRef) { 8203 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8204 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8205 BasePointers.push_back(VarLVal.getPointer()); 8206 Pointers.push_back(VarLValVal.getPointer()); 8207 Sizes.push_back(CGF.Builder.CreateIntCast( 8208 CGF.getTypeSize( 8209 VD->getType().getCanonicalType().getNonReferenceType()), 8210 CGF.Int64Ty, /*isSigned=*/true)); 8211 } else { 8212 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8213 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8214 BasePointers.push_back(VarLVal.getPointer()); 8215 Pointers.push_back(VarRVal.getScalarVal()); 8216 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8217 } 8218 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8219 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8220 } 8221 } 8222 8223 /// Set correct indices for lambdas captures. 8224 void adjustMemberOfForLambdaCaptures( 8225 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8226 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8227 MapFlagsArrayTy &Types) const { 8228 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8229 // Set correct member_of idx for all implicit lambda captures. 8230 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8231 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8232 continue; 8233 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8234 assert(BasePtr && "Unable to find base lambda address."); 8235 int TgtIdx = -1; 8236 for (unsigned J = I; J > 0; --J) { 8237 unsigned Idx = J - 1; 8238 if (Pointers[Idx] != BasePtr) 8239 continue; 8240 TgtIdx = Idx; 8241 break; 8242 } 8243 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8244 // All other current entries will be MEMBER_OF the combined entry 8245 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8246 // 0xFFFF in the MEMBER_OF field). 8247 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8248 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8249 } 8250 } 8251 8252 /// Generate the base pointers, section pointers, sizes and map types 8253 /// associated to a given capture. 8254 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8255 llvm::Value *Arg, 8256 MapBaseValuesArrayTy &BasePointers, 8257 MapValuesArrayTy &Pointers, 8258 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8259 StructRangeInfoTy &PartialStruct) const { 8260 assert(!Cap->capturesVariableArrayType() && 8261 "Not expecting to generate map info for a variable array type!"); 8262 8263 // We need to know when we generating information for the first component 8264 const ValueDecl *VD = Cap->capturesThis() 8265 ? nullptr 8266 : Cap->getCapturedVar()->getCanonicalDecl(); 8267 8268 // If this declaration appears in a is_device_ptr clause we just have to 8269 // pass the pointer by value. If it is a reference to a declaration, we just 8270 // pass its value. 8271 if (DevPointersMap.count(VD)) { 8272 BasePointers.emplace_back(Arg, VD); 8273 Pointers.push_back(Arg); 8274 Sizes.push_back( 8275 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8276 CGF.Int64Ty, /*isSigned=*/true)); 8277 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8278 return; 8279 } 8280 8281 using MapData = 8282 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8283 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8284 SmallVector<MapData, 4> DeclComponentLists; 8285 assert(CurDir.is<const OMPExecutableDirective *>() && 8286 "Expect a executable directive"); 8287 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8288 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8289 for (const auto &L : C->decl_component_lists(VD)) { 8290 assert(L.first == VD && 8291 "We got information for the wrong declaration??"); 8292 assert(!L.second.empty() && 8293 "Not expecting declaration with no component lists."); 8294 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8295 C->getMapTypeModifiers(), 8296 C->isImplicit()); 8297 } 8298 } 8299 8300 // Find overlapping elements (including the offset from the base element). 8301 llvm::SmallDenseMap< 8302 const MapData *, 8303 llvm::SmallVector< 8304 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8305 4> 8306 OverlappedData; 8307 size_t Count = 0; 8308 for (const MapData &L : DeclComponentLists) { 8309 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8310 OpenMPMapClauseKind MapType; 8311 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8312 bool IsImplicit; 8313 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8314 ++Count; 8315 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8316 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8317 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8318 auto CI = Components.rbegin(); 8319 auto CE = Components.rend(); 8320 auto SI = Components1.rbegin(); 8321 auto SE = Components1.rend(); 8322 for (; CI != CE && SI != SE; ++CI, ++SI) { 8323 if (CI->getAssociatedExpression()->getStmtClass() != 8324 SI->getAssociatedExpression()->getStmtClass()) 8325 break; 8326 // Are we dealing with different variables/fields? 8327 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8328 break; 8329 } 8330 // Found overlapping if, at least for one component, reached the head of 8331 // the components list. 8332 if (CI == CE || SI == SE) { 8333 assert((CI != CE || SI != SE) && 8334 "Unexpected full match of the mapping components."); 8335 const MapData &BaseData = CI == CE ? L : L1; 8336 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8337 SI == SE ? Components : Components1; 8338 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8339 OverlappedElements.getSecond().push_back(SubData); 8340 } 8341 } 8342 } 8343 // Sort the overlapped elements for each item. 8344 llvm::SmallVector<const FieldDecl *, 4> Layout; 8345 if (!OverlappedData.empty()) { 8346 if (const auto *CRD = 8347 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8348 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8349 else { 8350 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8351 Layout.append(RD->field_begin(), RD->field_end()); 8352 } 8353 } 8354 for (auto &Pair : OverlappedData) { 8355 llvm::sort( 8356 Pair.getSecond(), 8357 [&Layout]( 8358 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8359 OMPClauseMappableExprCommon::MappableExprComponentListRef 8360 Second) { 8361 auto CI = First.rbegin(); 8362 auto CE = First.rend(); 8363 auto SI = Second.rbegin(); 8364 auto SE = Second.rend(); 8365 for (; CI != CE && SI != SE; ++CI, ++SI) { 8366 if (CI->getAssociatedExpression()->getStmtClass() != 8367 SI->getAssociatedExpression()->getStmtClass()) 8368 break; 8369 // Are we dealing with different variables/fields? 8370 if (CI->getAssociatedDeclaration() != 8371 SI->getAssociatedDeclaration()) 8372 break; 8373 } 8374 8375 // Lists contain the same elements. 8376 if (CI == CE && SI == SE) 8377 return false; 8378 8379 // List with less elements is less than list with more elements. 8380 if (CI == CE || SI == SE) 8381 return CI == CE; 8382 8383 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8384 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8385 if (FD1->getParent() == FD2->getParent()) 8386 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8387 const auto It = 8388 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8389 return FD == FD1 || FD == FD2; 8390 }); 8391 return *It == FD1; 8392 }); 8393 } 8394 8395 // Associated with a capture, because the mapping flags depend on it. 8396 // Go through all of the elements with the overlapped elements. 8397 for (const auto &Pair : OverlappedData) { 8398 const MapData &L = *Pair.getFirst(); 8399 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8400 OpenMPMapClauseKind MapType; 8401 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8402 bool IsImplicit; 8403 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8404 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8405 OverlappedComponents = Pair.getSecond(); 8406 bool IsFirstComponentList = true; 8407 generateInfoForComponentList(MapType, MapModifiers, Components, 8408 BasePointers, Pointers, Sizes, Types, 8409 PartialStruct, IsFirstComponentList, 8410 IsImplicit, OverlappedComponents); 8411 } 8412 // Go through other elements without overlapped elements. 8413 bool IsFirstComponentList = OverlappedData.empty(); 8414 for (const MapData &L : DeclComponentLists) { 8415 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8416 OpenMPMapClauseKind MapType; 8417 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8418 bool IsImplicit; 8419 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8420 auto It = OverlappedData.find(&L); 8421 if (It == OverlappedData.end()) 8422 generateInfoForComponentList(MapType, MapModifiers, Components, 8423 BasePointers, Pointers, Sizes, Types, 8424 PartialStruct, IsFirstComponentList, 8425 IsImplicit); 8426 IsFirstComponentList = false; 8427 } 8428 } 8429 8430 /// Generate the base pointers, section pointers, sizes and map types 8431 /// associated with the declare target link variables. 8432 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8433 MapValuesArrayTy &Pointers, 8434 MapValuesArrayTy &Sizes, 8435 MapFlagsArrayTy &Types) const { 8436 assert(CurDir.is<const OMPExecutableDirective *>() && 8437 "Expect a executable directive"); 8438 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8439 // Map other list items in the map clause which are not captured variables 8440 // but "declare target link" global variables. 8441 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8442 for (const auto &L : C->component_lists()) { 8443 if (!L.first) 8444 continue; 8445 const auto *VD = dyn_cast<VarDecl>(L.first); 8446 if (!VD) 8447 continue; 8448 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8449 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8450 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8451 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8452 continue; 8453 StructRangeInfoTy PartialStruct; 8454 generateInfoForComponentList( 8455 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8456 Pointers, Sizes, Types, PartialStruct, 8457 /*IsFirstComponentList=*/true, C->isImplicit()); 8458 assert(!PartialStruct.Base.isValid() && 8459 "No partial structs for declare target link expected."); 8460 } 8461 } 8462 } 8463 8464 /// Generate the default map information for a given capture \a CI, 8465 /// record field declaration \a RI and captured value \a CV. 8466 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8467 const FieldDecl &RI, llvm::Value *CV, 8468 MapBaseValuesArrayTy &CurBasePointers, 8469 MapValuesArrayTy &CurPointers, 8470 MapValuesArrayTy &CurSizes, 8471 MapFlagsArrayTy &CurMapTypes) const { 8472 bool IsImplicit = true; 8473 // Do the default mapping. 8474 if (CI.capturesThis()) { 8475 CurBasePointers.push_back(CV); 8476 CurPointers.push_back(CV); 8477 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8478 CurSizes.push_back( 8479 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8480 CGF.Int64Ty, /*isSigned=*/true)); 8481 // Default map type. 8482 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8483 } else if (CI.capturesVariableByCopy()) { 8484 CurBasePointers.push_back(CV); 8485 CurPointers.push_back(CV); 8486 if (!RI.getType()->isAnyPointerType()) { 8487 // We have to signal to the runtime captures passed by value that are 8488 // not pointers. 8489 CurMapTypes.push_back(OMP_MAP_LITERAL); 8490 CurSizes.push_back(CGF.Builder.CreateIntCast( 8491 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8492 } else { 8493 // Pointers are implicitly mapped with a zero size and no flags 8494 // (other than first map that is added for all implicit maps). 8495 CurMapTypes.push_back(OMP_MAP_NONE); 8496 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8497 } 8498 const VarDecl *VD = CI.getCapturedVar(); 8499 auto I = FirstPrivateDecls.find(VD); 8500 if (I != FirstPrivateDecls.end()) 8501 IsImplicit = I->getSecond(); 8502 } else { 8503 assert(CI.capturesVariable() && "Expected captured reference."); 8504 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8505 QualType ElementType = PtrTy->getPointeeType(); 8506 CurSizes.push_back(CGF.Builder.CreateIntCast( 8507 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8508 // The default map type for a scalar/complex type is 'to' because by 8509 // default the value doesn't have to be retrieved. For an aggregate 8510 // type, the default is 'tofrom'. 8511 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8512 const VarDecl *VD = CI.getCapturedVar(); 8513 auto I = FirstPrivateDecls.find(VD); 8514 if (I != FirstPrivateDecls.end() && 8515 VD->getType().isConstant(CGF.getContext())) { 8516 llvm::Constant *Addr = 8517 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8518 // Copy the value of the original variable to the new global copy. 8519 CGF.Builder.CreateMemCpy( 8520 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8521 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8522 CurSizes.back(), /*IsVolatile=*/false); 8523 // Use new global variable as the base pointers. 8524 CurBasePointers.push_back(Addr); 8525 CurPointers.push_back(Addr); 8526 } else { 8527 CurBasePointers.push_back(CV); 8528 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8529 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8530 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8531 AlignmentSource::Decl)); 8532 CurPointers.push_back(PtrAddr.getPointer()); 8533 } else { 8534 CurPointers.push_back(CV); 8535 } 8536 } 8537 if (I != FirstPrivateDecls.end()) 8538 IsImplicit = I->getSecond(); 8539 } 8540 // Every default map produces a single argument which is a target parameter. 8541 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8542 8543 // Add flag stating this is an implicit map. 8544 if (IsImplicit) 8545 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8546 } 8547 }; 8548 } // anonymous namespace 8549 8550 /// Emit the arrays used to pass the captures and map information to the 8551 /// offloading runtime library. If there is no map or capture information, 8552 /// return nullptr by reference. 8553 static void 8554 emitOffloadingArrays(CodeGenFunction &CGF, 8555 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8556 MappableExprsHandler::MapValuesArrayTy &Pointers, 8557 MappableExprsHandler::MapValuesArrayTy &Sizes, 8558 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8559 CGOpenMPRuntime::TargetDataInfo &Info) { 8560 CodeGenModule &CGM = CGF.CGM; 8561 ASTContext &Ctx = CGF.getContext(); 8562 8563 // Reset the array information. 8564 Info.clearArrayInfo(); 8565 Info.NumberOfPtrs = BasePointers.size(); 8566 8567 if (Info.NumberOfPtrs) { 8568 // Detect if we have any capture size requiring runtime evaluation of the 8569 // size so that a constant array could be eventually used. 8570 bool hasRuntimeEvaluationCaptureSize = false; 8571 for (llvm::Value *S : Sizes) 8572 if (!isa<llvm::Constant>(S)) { 8573 hasRuntimeEvaluationCaptureSize = true; 8574 break; 8575 } 8576 8577 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8578 QualType PointerArrayType = Ctx.getConstantArrayType( 8579 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8580 /*IndexTypeQuals=*/0); 8581 8582 Info.BasePointersArray = 8583 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8584 Info.PointersArray = 8585 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8586 8587 // If we don't have any VLA types or other types that require runtime 8588 // evaluation, we can use a constant array for the map sizes, otherwise we 8589 // need to fill up the arrays as we do for the pointers. 8590 QualType Int64Ty = 8591 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8592 if (hasRuntimeEvaluationCaptureSize) { 8593 QualType SizeArrayType = Ctx.getConstantArrayType( 8594 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8595 /*IndexTypeQuals=*/0); 8596 Info.SizesArray = 8597 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8598 } else { 8599 // We expect all the sizes to be constant, so we collect them to create 8600 // a constant array. 8601 SmallVector<llvm::Constant *, 16> ConstSizes; 8602 for (llvm::Value *S : Sizes) 8603 ConstSizes.push_back(cast<llvm::Constant>(S)); 8604 8605 auto *SizesArrayInit = llvm::ConstantArray::get( 8606 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8607 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8608 auto *SizesArrayGbl = new llvm::GlobalVariable( 8609 CGM.getModule(), SizesArrayInit->getType(), 8610 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8611 SizesArrayInit, Name); 8612 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8613 Info.SizesArray = SizesArrayGbl; 8614 } 8615 8616 // The map types are always constant so we don't need to generate code to 8617 // fill arrays. Instead, we create an array constant. 8618 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8619 llvm::copy(MapTypes, Mapping.begin()); 8620 llvm::Constant *MapTypesArrayInit = 8621 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8622 std::string MaptypesName = 8623 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8624 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8625 CGM.getModule(), MapTypesArrayInit->getType(), 8626 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8627 MapTypesArrayInit, MaptypesName); 8628 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8629 Info.MapTypesArray = MapTypesArrayGbl; 8630 8631 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8632 llvm::Value *BPVal = *BasePointers[I]; 8633 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8634 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8635 Info.BasePointersArray, 0, I); 8636 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8637 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8638 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8639 CGF.Builder.CreateStore(BPVal, BPAddr); 8640 8641 if (Info.requiresDevicePointerInfo()) 8642 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8643 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8644 8645 llvm::Value *PVal = Pointers[I]; 8646 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8647 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8648 Info.PointersArray, 0, I); 8649 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8650 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8651 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8652 CGF.Builder.CreateStore(PVal, PAddr); 8653 8654 if (hasRuntimeEvaluationCaptureSize) { 8655 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8656 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8657 Info.SizesArray, 8658 /*Idx0=*/0, 8659 /*Idx1=*/I); 8660 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8661 CGF.Builder.CreateStore( 8662 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8663 SAddr); 8664 } 8665 } 8666 } 8667 } 8668 8669 /// Emit the arguments to be passed to the runtime library based on the 8670 /// arrays of pointers, sizes and map types. 8671 static void emitOffloadingArraysArgument( 8672 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8673 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8674 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8675 CodeGenModule &CGM = CGF.CGM; 8676 if (Info.NumberOfPtrs) { 8677 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8678 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8679 Info.BasePointersArray, 8680 /*Idx0=*/0, /*Idx1=*/0); 8681 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8682 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8683 Info.PointersArray, 8684 /*Idx0=*/0, 8685 /*Idx1=*/0); 8686 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8687 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8688 /*Idx0=*/0, /*Idx1=*/0); 8689 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8690 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8691 Info.MapTypesArray, 8692 /*Idx0=*/0, 8693 /*Idx1=*/0); 8694 } else { 8695 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8696 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8697 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8698 MapTypesArrayArg = 8699 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8700 } 8701 } 8702 8703 /// Check for inner distribute directive. 8704 static const OMPExecutableDirective * 8705 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8706 const auto *CS = D.getInnermostCapturedStmt(); 8707 const auto *Body = 8708 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8709 const Stmt *ChildStmt = 8710 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8711 8712 if (const auto *NestedDir = 8713 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8714 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8715 switch (D.getDirectiveKind()) { 8716 case OMPD_target: 8717 if (isOpenMPDistributeDirective(DKind)) 8718 return NestedDir; 8719 if (DKind == OMPD_teams) { 8720 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8721 /*IgnoreCaptured=*/true); 8722 if (!Body) 8723 return nullptr; 8724 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8725 if (const auto *NND = 8726 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8727 DKind = NND->getDirectiveKind(); 8728 if (isOpenMPDistributeDirective(DKind)) 8729 return NND; 8730 } 8731 } 8732 return nullptr; 8733 case OMPD_target_teams: 8734 if (isOpenMPDistributeDirective(DKind)) 8735 return NestedDir; 8736 return nullptr; 8737 case OMPD_target_parallel: 8738 case OMPD_target_simd: 8739 case OMPD_target_parallel_for: 8740 case OMPD_target_parallel_for_simd: 8741 return nullptr; 8742 case OMPD_target_teams_distribute: 8743 case OMPD_target_teams_distribute_simd: 8744 case OMPD_target_teams_distribute_parallel_for: 8745 case OMPD_target_teams_distribute_parallel_for_simd: 8746 case OMPD_parallel: 8747 case OMPD_for: 8748 case OMPD_parallel_for: 8749 case OMPD_parallel_sections: 8750 case OMPD_for_simd: 8751 case OMPD_parallel_for_simd: 8752 case OMPD_cancel: 8753 case OMPD_cancellation_point: 8754 case OMPD_ordered: 8755 case OMPD_threadprivate: 8756 case OMPD_allocate: 8757 case OMPD_task: 8758 case OMPD_simd: 8759 case OMPD_sections: 8760 case OMPD_section: 8761 case OMPD_single: 8762 case OMPD_master: 8763 case OMPD_critical: 8764 case OMPD_taskyield: 8765 case OMPD_barrier: 8766 case OMPD_taskwait: 8767 case OMPD_taskgroup: 8768 case OMPD_atomic: 8769 case OMPD_flush: 8770 case OMPD_teams: 8771 case OMPD_target_data: 8772 case OMPD_target_exit_data: 8773 case OMPD_target_enter_data: 8774 case OMPD_distribute: 8775 case OMPD_distribute_simd: 8776 case OMPD_distribute_parallel_for: 8777 case OMPD_distribute_parallel_for_simd: 8778 case OMPD_teams_distribute: 8779 case OMPD_teams_distribute_simd: 8780 case OMPD_teams_distribute_parallel_for: 8781 case OMPD_teams_distribute_parallel_for_simd: 8782 case OMPD_target_update: 8783 case OMPD_declare_simd: 8784 case OMPD_declare_variant: 8785 case OMPD_declare_target: 8786 case OMPD_end_declare_target: 8787 case OMPD_declare_reduction: 8788 case OMPD_declare_mapper: 8789 case OMPD_taskloop: 8790 case OMPD_taskloop_simd: 8791 case OMPD_master_taskloop: 8792 case OMPD_master_taskloop_simd: 8793 case OMPD_parallel_master_taskloop: 8794 case OMPD_requires: 8795 case OMPD_unknown: 8796 llvm_unreachable("Unexpected directive."); 8797 } 8798 } 8799 8800 return nullptr; 8801 } 8802 8803 /// Emit the user-defined mapper function. The code generation follows the 8804 /// pattern in the example below. 8805 /// \code 8806 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8807 /// void *base, void *begin, 8808 /// int64_t size, int64_t type) { 8809 /// // Allocate space for an array section first. 8810 /// if (size > 1 && !maptype.IsDelete) 8811 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8812 /// size*sizeof(Ty), clearToFrom(type)); 8813 /// // Map members. 8814 /// for (unsigned i = 0; i < size; i++) { 8815 /// // For each component specified by this mapper: 8816 /// for (auto c : all_components) { 8817 /// if (c.hasMapper()) 8818 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8819 /// c.arg_type); 8820 /// else 8821 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8822 /// c.arg_begin, c.arg_size, c.arg_type); 8823 /// } 8824 /// } 8825 /// // Delete the array section. 8826 /// if (size > 1 && maptype.IsDelete) 8827 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8828 /// size*sizeof(Ty), clearToFrom(type)); 8829 /// } 8830 /// \endcode 8831 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8832 CodeGenFunction *CGF) { 8833 if (UDMMap.count(D) > 0) 8834 return; 8835 ASTContext &C = CGM.getContext(); 8836 QualType Ty = D->getType(); 8837 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8838 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8839 auto *MapperVarDecl = 8840 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8841 SourceLocation Loc = D->getLocation(); 8842 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8843 8844 // Prepare mapper function arguments and attributes. 8845 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8846 C.VoidPtrTy, ImplicitParamDecl::Other); 8847 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8848 ImplicitParamDecl::Other); 8849 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8850 C.VoidPtrTy, ImplicitParamDecl::Other); 8851 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8852 ImplicitParamDecl::Other); 8853 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8854 ImplicitParamDecl::Other); 8855 FunctionArgList Args; 8856 Args.push_back(&HandleArg); 8857 Args.push_back(&BaseArg); 8858 Args.push_back(&BeginArg); 8859 Args.push_back(&SizeArg); 8860 Args.push_back(&TypeArg); 8861 const CGFunctionInfo &FnInfo = 8862 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8863 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8864 SmallString<64> TyStr; 8865 llvm::raw_svector_ostream Out(TyStr); 8866 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8867 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8868 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8869 Name, &CGM.getModule()); 8870 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8871 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8872 // Start the mapper function code generation. 8873 CodeGenFunction MapperCGF(CGM); 8874 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8875 // Compute the starting and end addreses of array elements. 8876 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8877 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8878 C.getPointerType(Int64Ty), Loc); 8879 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8880 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8881 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8882 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8883 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8884 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8885 C.getPointerType(Int64Ty), Loc); 8886 // Prepare common arguments for array initiation and deletion. 8887 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8888 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8889 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8890 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8891 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8892 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8893 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8894 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8895 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8896 8897 // Emit array initiation if this is an array section and \p MapType indicates 8898 // that memory allocation is required. 8899 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8900 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8901 ElementSize, HeadBB, /*IsInit=*/true); 8902 8903 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8904 8905 // Emit the loop header block. 8906 MapperCGF.EmitBlock(HeadBB); 8907 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8908 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8909 // Evaluate whether the initial condition is satisfied. 8910 llvm::Value *IsEmpty = 8911 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8912 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8913 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8914 8915 // Emit the loop body block. 8916 MapperCGF.EmitBlock(BodyBB); 8917 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8918 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8919 PtrPHI->addIncoming(PtrBegin, EntryBB); 8920 Address PtrCurrent = 8921 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8922 .getAlignment() 8923 .alignmentOfArrayElement(ElementSize)); 8924 // Privatize the declared variable of mapper to be the current array element. 8925 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8926 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8927 return MapperCGF 8928 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8929 .getAddress(); 8930 }); 8931 (void)Scope.Privatize(); 8932 8933 // Get map clause information. Fill up the arrays with all mapped variables. 8934 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8935 MappableExprsHandler::MapValuesArrayTy Pointers; 8936 MappableExprsHandler::MapValuesArrayTy Sizes; 8937 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8938 MappableExprsHandler MEHandler(*D, MapperCGF); 8939 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8940 8941 // Call the runtime API __tgt_mapper_num_components to get the number of 8942 // pre-existing components. 8943 llvm::Value *OffloadingArgs[] = {Handle}; 8944 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8945 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8946 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8947 PreviousSize, 8948 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8949 8950 // Fill up the runtime mapper handle for all components. 8951 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8952 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8953 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8954 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8955 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8956 llvm::Value *CurSizeArg = Sizes[I]; 8957 8958 // Extract the MEMBER_OF field from the map type. 8959 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8960 MapperCGF.EmitBlock(MemberBB); 8961 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8962 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8963 OriMapType, 8964 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8965 llvm::BasicBlock *MemberCombineBB = 8966 MapperCGF.createBasicBlock("omp.member.combine"); 8967 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8968 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8969 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8970 // Add the number of pre-existing components to the MEMBER_OF field if it 8971 // is valid. 8972 MapperCGF.EmitBlock(MemberCombineBB); 8973 llvm::Value *CombinedMember = 8974 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8975 // Do nothing if it is not a member of previous components. 8976 MapperCGF.EmitBlock(TypeBB); 8977 llvm::PHINode *MemberMapType = 8978 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 8979 MemberMapType->addIncoming(OriMapType, MemberBB); 8980 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 8981 8982 // Combine the map type inherited from user-defined mapper with that 8983 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 8984 // bits of the \a MapType, which is the input argument of the mapper 8985 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 8986 // bits of MemberMapType. 8987 // [OpenMP 5.0], 1.2.6. map-type decay. 8988 // | alloc | to | from | tofrom | release | delete 8989 // ---------------------------------------------------------- 8990 // alloc | alloc | alloc | alloc | alloc | release | delete 8991 // to | alloc | to | alloc | to | release | delete 8992 // from | alloc | alloc | from | from | release | delete 8993 // tofrom | alloc | to | from | tofrom | release | delete 8994 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 8995 MapType, 8996 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 8997 MappableExprsHandler::OMP_MAP_FROM)); 8998 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 8999 llvm::BasicBlock *AllocElseBB = 9000 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9001 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9002 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9003 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9004 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9005 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9006 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9007 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9008 MapperCGF.EmitBlock(AllocBB); 9009 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9010 MemberMapType, 9011 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9012 MappableExprsHandler::OMP_MAP_FROM))); 9013 MapperCGF.Builder.CreateBr(EndBB); 9014 MapperCGF.EmitBlock(AllocElseBB); 9015 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9016 LeftToFrom, 9017 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9018 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9019 // In case of to, clear OMP_MAP_FROM. 9020 MapperCGF.EmitBlock(ToBB); 9021 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9022 MemberMapType, 9023 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9024 MapperCGF.Builder.CreateBr(EndBB); 9025 MapperCGF.EmitBlock(ToElseBB); 9026 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9027 LeftToFrom, 9028 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9029 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9030 // In case of from, clear OMP_MAP_TO. 9031 MapperCGF.EmitBlock(FromBB); 9032 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9033 MemberMapType, 9034 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9035 // In case of tofrom, do nothing. 9036 MapperCGF.EmitBlock(EndBB); 9037 llvm::PHINode *CurMapType = 9038 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9039 CurMapType->addIncoming(AllocMapType, AllocBB); 9040 CurMapType->addIncoming(ToMapType, ToBB); 9041 CurMapType->addIncoming(FromMapType, FromBB); 9042 CurMapType->addIncoming(MemberMapType, ToElseBB); 9043 9044 // TODO: call the corresponding mapper function if a user-defined mapper is 9045 // associated with this map clause. 9046 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9047 // data structure. 9048 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9049 CurSizeArg, CurMapType}; 9050 MapperCGF.EmitRuntimeCall( 9051 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9052 OffloadingArgs); 9053 } 9054 9055 // Update the pointer to point to the next element that needs to be mapped, 9056 // and check whether we have mapped all elements. 9057 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9058 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9059 PtrPHI->addIncoming(PtrNext, BodyBB); 9060 llvm::Value *IsDone = 9061 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9062 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9063 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9064 9065 MapperCGF.EmitBlock(ExitBB); 9066 // Emit array deletion if this is an array section and \p MapType indicates 9067 // that deletion is required. 9068 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9069 ElementSize, DoneBB, /*IsInit=*/false); 9070 9071 // Emit the function exit block. 9072 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9073 MapperCGF.FinishFunction(); 9074 UDMMap.try_emplace(D, Fn); 9075 if (CGF) { 9076 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9077 Decls.second.push_back(D); 9078 } 9079 } 9080 9081 /// Emit the array initialization or deletion portion for user-defined mapper 9082 /// code generation. First, it evaluates whether an array section is mapped and 9083 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9084 /// true, and \a MapType indicates to not delete this array, array 9085 /// initialization code is generated. If \a IsInit is false, and \a MapType 9086 /// indicates to not this array, array deletion code is generated. 9087 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9088 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9089 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9090 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9091 StringRef Prefix = IsInit ? ".init" : ".del"; 9092 9093 // Evaluate if this is an array section. 9094 llvm::BasicBlock *IsDeleteBB = 9095 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9096 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9097 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9098 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9099 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9100 9101 // Evaluate if we are going to delete this section. 9102 MapperCGF.EmitBlock(IsDeleteBB); 9103 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9104 MapType, 9105 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9106 llvm::Value *DeleteCond; 9107 if (IsInit) { 9108 DeleteCond = MapperCGF.Builder.CreateIsNull( 9109 DeleteBit, "omp.array" + Prefix + ".delete"); 9110 } else { 9111 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9112 DeleteBit, "omp.array" + Prefix + ".delete"); 9113 } 9114 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9115 9116 MapperCGF.EmitBlock(BodyBB); 9117 // Get the array size by multiplying element size and element number (i.e., \p 9118 // Size). 9119 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9120 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9121 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9122 // memory allocation/deletion purpose only. 9123 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9124 MapType, 9125 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9126 MappableExprsHandler::OMP_MAP_FROM))); 9127 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9128 // data structure. 9129 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9130 MapperCGF.EmitRuntimeCall( 9131 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9132 } 9133 9134 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9135 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9136 llvm::Value *DeviceID, 9137 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9138 const OMPLoopDirective &D)> 9139 SizeEmitter) { 9140 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9141 const OMPExecutableDirective *TD = &D; 9142 // Get nested teams distribute kind directive, if any. 9143 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9144 TD = getNestedDistributeDirective(CGM.getContext(), D); 9145 if (!TD) 9146 return; 9147 const auto *LD = cast<OMPLoopDirective>(TD); 9148 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9149 PrePostActionTy &) { 9150 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9151 llvm::Value *Args[] = {DeviceID, NumIterations}; 9152 CGF.EmitRuntimeCall( 9153 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9154 } 9155 }; 9156 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9157 } 9158 9159 void CGOpenMPRuntime::emitTargetCall( 9160 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9161 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9162 const Expr *Device, 9163 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9164 const OMPLoopDirective &D)> 9165 SizeEmitter) { 9166 if (!CGF.HaveInsertPoint()) 9167 return; 9168 9169 assert(OutlinedFn && "Invalid outlined function!"); 9170 9171 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9172 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9173 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9174 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9175 PrePostActionTy &) { 9176 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9177 }; 9178 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9179 9180 CodeGenFunction::OMPTargetDataInfo InputInfo; 9181 llvm::Value *MapTypesArray = nullptr; 9182 // Fill up the pointer arrays and transfer execution to the device. 9183 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9184 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9185 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9186 // On top of the arrays that were filled up, the target offloading call 9187 // takes as arguments the device id as well as the host pointer. The host 9188 // pointer is used by the runtime library to identify the current target 9189 // region, so it only has to be unique and not necessarily point to 9190 // anything. It could be the pointer to the outlined function that 9191 // implements the target region, but we aren't using that so that the 9192 // compiler doesn't need to keep that, and could therefore inline the host 9193 // function if proven worthwhile during optimization. 9194 9195 // From this point on, we need to have an ID of the target region defined. 9196 assert(OutlinedFnID && "Invalid outlined function ID!"); 9197 9198 // Emit device ID if any. 9199 llvm::Value *DeviceID; 9200 if (Device) { 9201 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9202 CGF.Int64Ty, /*isSigned=*/true); 9203 } else { 9204 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9205 } 9206 9207 // Emit the number of elements in the offloading arrays. 9208 llvm::Value *PointerNum = 9209 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9210 9211 // Return value of the runtime offloading call. 9212 llvm::Value *Return; 9213 9214 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9215 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9216 9217 // Emit tripcount for the target loop-based directive. 9218 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9219 9220 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9221 // The target region is an outlined function launched by the runtime 9222 // via calls __tgt_target() or __tgt_target_teams(). 9223 // 9224 // __tgt_target() launches a target region with one team and one thread, 9225 // executing a serial region. This master thread may in turn launch 9226 // more threads within its team upon encountering a parallel region, 9227 // however, no additional teams can be launched on the device. 9228 // 9229 // __tgt_target_teams() launches a target region with one or more teams, 9230 // each with one or more threads. This call is required for target 9231 // constructs such as: 9232 // 'target teams' 9233 // 'target' / 'teams' 9234 // 'target teams distribute parallel for' 9235 // 'target parallel' 9236 // and so on. 9237 // 9238 // Note that on the host and CPU targets, the runtime implementation of 9239 // these calls simply call the outlined function without forking threads. 9240 // The outlined functions themselves have runtime calls to 9241 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9242 // the compiler in emitTeamsCall() and emitParallelCall(). 9243 // 9244 // In contrast, on the NVPTX target, the implementation of 9245 // __tgt_target_teams() launches a GPU kernel with the requested number 9246 // of teams and threads so no additional calls to the runtime are required. 9247 if (NumTeams) { 9248 // If we have NumTeams defined this means that we have an enclosed teams 9249 // region. Therefore we also expect to have NumThreads defined. These two 9250 // values should be defined in the presence of a teams directive, 9251 // regardless of having any clauses associated. If the user is using teams 9252 // but no clauses, these two values will be the default that should be 9253 // passed to the runtime library - a 32-bit integer with the value zero. 9254 assert(NumThreads && "Thread limit expression should be available along " 9255 "with number of teams."); 9256 llvm::Value *OffloadingArgs[] = {DeviceID, 9257 OutlinedFnID, 9258 PointerNum, 9259 InputInfo.BasePointersArray.getPointer(), 9260 InputInfo.PointersArray.getPointer(), 9261 InputInfo.SizesArray.getPointer(), 9262 MapTypesArray, 9263 NumTeams, 9264 NumThreads}; 9265 Return = CGF.EmitRuntimeCall( 9266 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9267 : OMPRTL__tgt_target_teams), 9268 OffloadingArgs); 9269 } else { 9270 llvm::Value *OffloadingArgs[] = {DeviceID, 9271 OutlinedFnID, 9272 PointerNum, 9273 InputInfo.BasePointersArray.getPointer(), 9274 InputInfo.PointersArray.getPointer(), 9275 InputInfo.SizesArray.getPointer(), 9276 MapTypesArray}; 9277 Return = CGF.EmitRuntimeCall( 9278 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9279 : OMPRTL__tgt_target), 9280 OffloadingArgs); 9281 } 9282 9283 // Check the error code and execute the host version if required. 9284 llvm::BasicBlock *OffloadFailedBlock = 9285 CGF.createBasicBlock("omp_offload.failed"); 9286 llvm::BasicBlock *OffloadContBlock = 9287 CGF.createBasicBlock("omp_offload.cont"); 9288 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9289 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9290 9291 CGF.EmitBlock(OffloadFailedBlock); 9292 if (RequiresOuterTask) { 9293 CapturedVars.clear(); 9294 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9295 } 9296 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9297 CGF.EmitBranch(OffloadContBlock); 9298 9299 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9300 }; 9301 9302 // Notify that the host version must be executed. 9303 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9304 RequiresOuterTask](CodeGenFunction &CGF, 9305 PrePostActionTy &) { 9306 if (RequiresOuterTask) { 9307 CapturedVars.clear(); 9308 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9309 } 9310 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9311 }; 9312 9313 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9314 &CapturedVars, RequiresOuterTask, 9315 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9316 // Fill up the arrays with all the captured variables. 9317 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9318 MappableExprsHandler::MapValuesArrayTy Pointers; 9319 MappableExprsHandler::MapValuesArrayTy Sizes; 9320 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9321 9322 // Get mappable expression information. 9323 MappableExprsHandler MEHandler(D, CGF); 9324 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9325 9326 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9327 auto CV = CapturedVars.begin(); 9328 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9329 CE = CS.capture_end(); 9330 CI != CE; ++CI, ++RI, ++CV) { 9331 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9332 MappableExprsHandler::MapValuesArrayTy CurPointers; 9333 MappableExprsHandler::MapValuesArrayTy CurSizes; 9334 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9335 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9336 9337 // VLA sizes are passed to the outlined region by copy and do not have map 9338 // information associated. 9339 if (CI->capturesVariableArrayType()) { 9340 CurBasePointers.push_back(*CV); 9341 CurPointers.push_back(*CV); 9342 CurSizes.push_back(CGF.Builder.CreateIntCast( 9343 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9344 // Copy to the device as an argument. No need to retrieve it. 9345 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9346 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9347 MappableExprsHandler::OMP_MAP_IMPLICIT); 9348 } else { 9349 // If we have any information in the map clause, we use it, otherwise we 9350 // just do a default mapping. 9351 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9352 CurSizes, CurMapTypes, PartialStruct); 9353 if (CurBasePointers.empty()) 9354 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9355 CurPointers, CurSizes, CurMapTypes); 9356 // Generate correct mapping for variables captured by reference in 9357 // lambdas. 9358 if (CI->capturesVariable()) 9359 MEHandler.generateInfoForLambdaCaptures( 9360 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9361 CurMapTypes, LambdaPointers); 9362 } 9363 // We expect to have at least an element of information for this capture. 9364 assert(!CurBasePointers.empty() && 9365 "Non-existing map pointer for capture!"); 9366 assert(CurBasePointers.size() == CurPointers.size() && 9367 CurBasePointers.size() == CurSizes.size() && 9368 CurBasePointers.size() == CurMapTypes.size() && 9369 "Inconsistent map information sizes!"); 9370 9371 // If there is an entry in PartialStruct it means we have a struct with 9372 // individual members mapped. Emit an extra combined entry. 9373 if (PartialStruct.Base.isValid()) 9374 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9375 CurMapTypes, PartialStruct); 9376 9377 // We need to append the results of this capture to what we already have. 9378 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9379 Pointers.append(CurPointers.begin(), CurPointers.end()); 9380 Sizes.append(CurSizes.begin(), CurSizes.end()); 9381 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9382 } 9383 // Adjust MEMBER_OF flags for the lambdas captures. 9384 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9385 Pointers, MapTypes); 9386 // Map other list items in the map clause which are not captured variables 9387 // but "declare target link" global variables. 9388 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9389 MapTypes); 9390 9391 TargetDataInfo Info; 9392 // Fill up the arrays and create the arguments. 9393 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9394 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9395 Info.PointersArray, Info.SizesArray, 9396 Info.MapTypesArray, Info); 9397 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9398 InputInfo.BasePointersArray = 9399 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9400 InputInfo.PointersArray = 9401 Address(Info.PointersArray, CGM.getPointerAlign()); 9402 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9403 MapTypesArray = Info.MapTypesArray; 9404 if (RequiresOuterTask) 9405 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9406 else 9407 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9408 }; 9409 9410 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9411 CodeGenFunction &CGF, PrePostActionTy &) { 9412 if (RequiresOuterTask) { 9413 CodeGenFunction::OMPTargetDataInfo InputInfo; 9414 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9415 } else { 9416 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9417 } 9418 }; 9419 9420 // If we have a target function ID it means that we need to support 9421 // offloading, otherwise, just execute on the host. We need to execute on host 9422 // regardless of the conditional in the if clause if, e.g., the user do not 9423 // specify target triples. 9424 if (OutlinedFnID) { 9425 if (IfCond) { 9426 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9427 } else { 9428 RegionCodeGenTy ThenRCG(TargetThenGen); 9429 ThenRCG(CGF); 9430 } 9431 } else { 9432 RegionCodeGenTy ElseRCG(TargetElseGen); 9433 ElseRCG(CGF); 9434 } 9435 } 9436 9437 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9438 StringRef ParentName) { 9439 if (!S) 9440 return; 9441 9442 // Codegen OMP target directives that offload compute to the device. 9443 bool RequiresDeviceCodegen = 9444 isa<OMPExecutableDirective>(S) && 9445 isOpenMPTargetExecutionDirective( 9446 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9447 9448 if (RequiresDeviceCodegen) { 9449 const auto &E = *cast<OMPExecutableDirective>(S); 9450 unsigned DeviceID; 9451 unsigned FileID; 9452 unsigned Line; 9453 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9454 FileID, Line); 9455 9456 // Is this a target region that should not be emitted as an entry point? If 9457 // so just signal we are done with this target region. 9458 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9459 ParentName, Line)) 9460 return; 9461 9462 switch (E.getDirectiveKind()) { 9463 case OMPD_target: 9464 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9465 cast<OMPTargetDirective>(E)); 9466 break; 9467 case OMPD_target_parallel: 9468 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9469 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9470 break; 9471 case OMPD_target_teams: 9472 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9473 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9474 break; 9475 case OMPD_target_teams_distribute: 9476 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9477 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9478 break; 9479 case OMPD_target_teams_distribute_simd: 9480 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9481 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9482 break; 9483 case OMPD_target_parallel_for: 9484 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9485 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9486 break; 9487 case OMPD_target_parallel_for_simd: 9488 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9489 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9490 break; 9491 case OMPD_target_simd: 9492 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9493 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9494 break; 9495 case OMPD_target_teams_distribute_parallel_for: 9496 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9497 CGM, ParentName, 9498 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9499 break; 9500 case OMPD_target_teams_distribute_parallel_for_simd: 9501 CodeGenFunction:: 9502 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9503 CGM, ParentName, 9504 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9505 break; 9506 case OMPD_parallel: 9507 case OMPD_for: 9508 case OMPD_parallel_for: 9509 case OMPD_parallel_sections: 9510 case OMPD_for_simd: 9511 case OMPD_parallel_for_simd: 9512 case OMPD_cancel: 9513 case OMPD_cancellation_point: 9514 case OMPD_ordered: 9515 case OMPD_threadprivate: 9516 case OMPD_allocate: 9517 case OMPD_task: 9518 case OMPD_simd: 9519 case OMPD_sections: 9520 case OMPD_section: 9521 case OMPD_single: 9522 case OMPD_master: 9523 case OMPD_critical: 9524 case OMPD_taskyield: 9525 case OMPD_barrier: 9526 case OMPD_taskwait: 9527 case OMPD_taskgroup: 9528 case OMPD_atomic: 9529 case OMPD_flush: 9530 case OMPD_teams: 9531 case OMPD_target_data: 9532 case OMPD_target_exit_data: 9533 case OMPD_target_enter_data: 9534 case OMPD_distribute: 9535 case OMPD_distribute_simd: 9536 case OMPD_distribute_parallel_for: 9537 case OMPD_distribute_parallel_for_simd: 9538 case OMPD_teams_distribute: 9539 case OMPD_teams_distribute_simd: 9540 case OMPD_teams_distribute_parallel_for: 9541 case OMPD_teams_distribute_parallel_for_simd: 9542 case OMPD_target_update: 9543 case OMPD_declare_simd: 9544 case OMPD_declare_variant: 9545 case OMPD_declare_target: 9546 case OMPD_end_declare_target: 9547 case OMPD_declare_reduction: 9548 case OMPD_declare_mapper: 9549 case OMPD_taskloop: 9550 case OMPD_taskloop_simd: 9551 case OMPD_master_taskloop: 9552 case OMPD_master_taskloop_simd: 9553 case OMPD_parallel_master_taskloop: 9554 case OMPD_requires: 9555 case OMPD_unknown: 9556 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9557 } 9558 return; 9559 } 9560 9561 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9562 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9563 return; 9564 9565 scanForTargetRegionsFunctions( 9566 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9567 return; 9568 } 9569 9570 // If this is a lambda function, look into its body. 9571 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9572 S = L->getBody(); 9573 9574 // Keep looking for target regions recursively. 9575 for (const Stmt *II : S->children()) 9576 scanForTargetRegionsFunctions(II, ParentName); 9577 } 9578 9579 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9580 // If emitting code for the host, we do not process FD here. Instead we do 9581 // the normal code generation. 9582 if (!CGM.getLangOpts().OpenMPIsDevice) { 9583 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9584 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9585 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9586 // Do not emit device_type(nohost) functions for the host. 9587 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9588 return true; 9589 } 9590 return false; 9591 } 9592 9593 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9594 StringRef Name = CGM.getMangledName(GD); 9595 // Try to detect target regions in the function. 9596 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9597 scanForTargetRegionsFunctions(FD->getBody(), Name); 9598 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9599 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9600 // Do not emit device_type(nohost) functions for the host. 9601 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9602 return true; 9603 } 9604 9605 // Do not to emit function if it is not marked as declare target. 9606 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9607 AlreadyEmittedTargetFunctions.count(Name) == 0; 9608 } 9609 9610 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9611 if (!CGM.getLangOpts().OpenMPIsDevice) 9612 return false; 9613 9614 // Check if there are Ctors/Dtors in this declaration and look for target 9615 // regions in it. We use the complete variant to produce the kernel name 9616 // mangling. 9617 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9618 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9619 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9620 StringRef ParentName = 9621 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9622 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9623 } 9624 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9625 StringRef ParentName = 9626 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9627 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9628 } 9629 } 9630 9631 // Do not to emit variable if it is not marked as declare target. 9632 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9633 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9634 cast<VarDecl>(GD.getDecl())); 9635 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9636 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9637 HasRequiresUnifiedSharedMemory)) { 9638 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9639 return true; 9640 } 9641 return false; 9642 } 9643 9644 llvm::Constant * 9645 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9646 const VarDecl *VD) { 9647 assert(VD->getType().isConstant(CGM.getContext()) && 9648 "Expected constant variable."); 9649 StringRef VarName; 9650 llvm::Constant *Addr; 9651 llvm::GlobalValue::LinkageTypes Linkage; 9652 QualType Ty = VD->getType(); 9653 SmallString<128> Buffer; 9654 { 9655 unsigned DeviceID; 9656 unsigned FileID; 9657 unsigned Line; 9658 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9659 FileID, Line); 9660 llvm::raw_svector_ostream OS(Buffer); 9661 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9662 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9663 VarName = OS.str(); 9664 } 9665 Linkage = llvm::GlobalValue::InternalLinkage; 9666 Addr = 9667 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9668 getDefaultFirstprivateAddressSpace()); 9669 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9670 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9671 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9672 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9673 VarName, Addr, VarSize, 9674 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9675 return Addr; 9676 } 9677 9678 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9679 llvm::Constant *Addr) { 9680 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9681 !CGM.getLangOpts().OpenMPIsDevice) 9682 return; 9683 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9684 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9685 if (!Res) { 9686 if (CGM.getLangOpts().OpenMPIsDevice) { 9687 // Register non-target variables being emitted in device code (debug info 9688 // may cause this). 9689 StringRef VarName = CGM.getMangledName(VD); 9690 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9691 } 9692 return; 9693 } 9694 // Register declare target variables. 9695 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9696 StringRef VarName; 9697 CharUnits VarSize; 9698 llvm::GlobalValue::LinkageTypes Linkage; 9699 9700 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9701 !HasRequiresUnifiedSharedMemory) { 9702 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9703 VarName = CGM.getMangledName(VD); 9704 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9705 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9706 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9707 } else { 9708 VarSize = CharUnits::Zero(); 9709 } 9710 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9711 // Temp solution to prevent optimizations of the internal variables. 9712 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9713 std::string RefName = getName({VarName, "ref"}); 9714 if (!CGM.GetGlobalValue(RefName)) { 9715 llvm::Constant *AddrRef = 9716 getOrCreateInternalVariable(Addr->getType(), RefName); 9717 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9718 GVAddrRef->setConstant(/*Val=*/true); 9719 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9720 GVAddrRef->setInitializer(Addr); 9721 CGM.addCompilerUsedGlobal(GVAddrRef); 9722 } 9723 } 9724 } else { 9725 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9726 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9727 HasRequiresUnifiedSharedMemory)) && 9728 "Declare target attribute must link or to with unified memory."); 9729 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9730 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9731 else 9732 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9733 9734 if (CGM.getLangOpts().OpenMPIsDevice) { 9735 VarName = Addr->getName(); 9736 Addr = nullptr; 9737 } else { 9738 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9739 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9740 } 9741 VarSize = CGM.getPointerSize(); 9742 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9743 } 9744 9745 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9746 VarName, Addr, VarSize, Flags, Linkage); 9747 } 9748 9749 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9750 if (isa<FunctionDecl>(GD.getDecl()) || 9751 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9752 return emitTargetFunctions(GD); 9753 9754 return emitTargetGlobalVariable(GD); 9755 } 9756 9757 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9758 for (const VarDecl *VD : DeferredGlobalVariables) { 9759 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9760 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9761 if (!Res) 9762 continue; 9763 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9764 !HasRequiresUnifiedSharedMemory) { 9765 CGM.EmitGlobal(VD); 9766 } else { 9767 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9768 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9769 HasRequiresUnifiedSharedMemory)) && 9770 "Expected link clause or to clause with unified memory."); 9771 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9772 } 9773 } 9774 } 9775 9776 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9777 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9778 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9779 " Expected target-based directive."); 9780 } 9781 9782 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9783 const OMPRequiresDecl *D) { 9784 for (const OMPClause *Clause : D->clauselists()) { 9785 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9786 HasRequiresUnifiedSharedMemory = true; 9787 break; 9788 } 9789 } 9790 } 9791 9792 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9793 LangAS &AS) { 9794 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9795 return false; 9796 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9797 switch(A->getAllocatorType()) { 9798 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9799 // Not supported, fallback to the default mem space. 9800 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9801 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9802 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9803 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9804 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9805 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9806 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9807 AS = LangAS::Default; 9808 return true; 9809 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9810 llvm_unreachable("Expected predefined allocator for the variables with the " 9811 "static storage."); 9812 } 9813 return false; 9814 } 9815 9816 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9817 return HasRequiresUnifiedSharedMemory; 9818 } 9819 9820 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9821 CodeGenModule &CGM) 9822 : CGM(CGM) { 9823 if (CGM.getLangOpts().OpenMPIsDevice) { 9824 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9825 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9826 } 9827 } 9828 9829 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9830 if (CGM.getLangOpts().OpenMPIsDevice) 9831 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9832 } 9833 9834 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9835 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9836 return true; 9837 9838 StringRef Name = CGM.getMangledName(GD); 9839 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9840 // Do not to emit function if it is marked as declare target as it was already 9841 // emitted. 9842 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9843 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9844 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9845 return !F->isDeclaration(); 9846 return false; 9847 } 9848 return true; 9849 } 9850 9851 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9852 } 9853 9854 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9855 // If we don't have entries or if we are emitting code for the device, we 9856 // don't need to do anything. 9857 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9858 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9859 (OffloadEntriesInfoManager.empty() && 9860 !HasEmittedDeclareTargetRegion && 9861 !HasEmittedTargetRegion)) 9862 return nullptr; 9863 9864 // Create and register the function that handles the requires directives. 9865 ASTContext &C = CGM.getContext(); 9866 9867 llvm::Function *RequiresRegFn; 9868 { 9869 CodeGenFunction CGF(CGM); 9870 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9871 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9872 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9873 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9874 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9875 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9876 // TODO: check for other requires clauses. 9877 // The requires directive takes effect only when a target region is 9878 // present in the compilation unit. Otherwise it is ignored and not 9879 // passed to the runtime. This avoids the runtime from throwing an error 9880 // for mismatching requires clauses across compilation units that don't 9881 // contain at least 1 target region. 9882 assert((HasEmittedTargetRegion || 9883 HasEmittedDeclareTargetRegion || 9884 !OffloadEntriesInfoManager.empty()) && 9885 "Target or declare target region expected."); 9886 if (HasRequiresUnifiedSharedMemory) 9887 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9888 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9889 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9890 CGF.FinishFunction(); 9891 } 9892 return RequiresRegFn; 9893 } 9894 9895 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9896 const OMPExecutableDirective &D, 9897 SourceLocation Loc, 9898 llvm::Function *OutlinedFn, 9899 ArrayRef<llvm::Value *> CapturedVars) { 9900 if (!CGF.HaveInsertPoint()) 9901 return; 9902 9903 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9904 CodeGenFunction::RunCleanupsScope Scope(CGF); 9905 9906 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9907 llvm::Value *Args[] = { 9908 RTLoc, 9909 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9910 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9911 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9912 RealArgs.append(std::begin(Args), std::end(Args)); 9913 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9914 9915 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9916 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9917 } 9918 9919 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9920 const Expr *NumTeams, 9921 const Expr *ThreadLimit, 9922 SourceLocation Loc) { 9923 if (!CGF.HaveInsertPoint()) 9924 return; 9925 9926 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9927 9928 llvm::Value *NumTeamsVal = 9929 NumTeams 9930 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9931 CGF.CGM.Int32Ty, /* isSigned = */ true) 9932 : CGF.Builder.getInt32(0); 9933 9934 llvm::Value *ThreadLimitVal = 9935 ThreadLimit 9936 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9937 CGF.CGM.Int32Ty, /* isSigned = */ true) 9938 : CGF.Builder.getInt32(0); 9939 9940 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9941 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9942 ThreadLimitVal}; 9943 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9944 PushNumTeamsArgs); 9945 } 9946 9947 void CGOpenMPRuntime::emitTargetDataCalls( 9948 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9949 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9950 if (!CGF.HaveInsertPoint()) 9951 return; 9952 9953 // Action used to replace the default codegen action and turn privatization 9954 // off. 9955 PrePostActionTy NoPrivAction; 9956 9957 // Generate the code for the opening of the data environment. Capture all the 9958 // arguments of the runtime call by reference because they are used in the 9959 // closing of the region. 9960 auto &&BeginThenGen = [this, &D, Device, &Info, 9961 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9962 // Fill up the arrays with all the mapped variables. 9963 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9964 MappableExprsHandler::MapValuesArrayTy Pointers; 9965 MappableExprsHandler::MapValuesArrayTy Sizes; 9966 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9967 9968 // Get map clause information. 9969 MappableExprsHandler MCHandler(D, CGF); 9970 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9971 9972 // Fill up the arrays and create the arguments. 9973 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9974 9975 llvm::Value *BasePointersArrayArg = nullptr; 9976 llvm::Value *PointersArrayArg = nullptr; 9977 llvm::Value *SizesArrayArg = nullptr; 9978 llvm::Value *MapTypesArrayArg = nullptr; 9979 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9980 SizesArrayArg, MapTypesArrayArg, Info); 9981 9982 // Emit device ID if any. 9983 llvm::Value *DeviceID = nullptr; 9984 if (Device) { 9985 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9986 CGF.Int64Ty, /*isSigned=*/true); 9987 } else { 9988 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9989 } 9990 9991 // Emit the number of elements in the offloading arrays. 9992 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9993 9994 llvm::Value *OffloadingArgs[] = { 9995 DeviceID, PointerNum, BasePointersArrayArg, 9996 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9997 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9998 OffloadingArgs); 9999 10000 // If device pointer privatization is required, emit the body of the region 10001 // here. It will have to be duplicated: with and without privatization. 10002 if (!Info.CaptureDeviceAddrMap.empty()) 10003 CodeGen(CGF); 10004 }; 10005 10006 // Generate code for the closing of the data region. 10007 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10008 PrePostActionTy &) { 10009 assert(Info.isValid() && "Invalid data environment closing arguments."); 10010 10011 llvm::Value *BasePointersArrayArg = nullptr; 10012 llvm::Value *PointersArrayArg = nullptr; 10013 llvm::Value *SizesArrayArg = nullptr; 10014 llvm::Value *MapTypesArrayArg = nullptr; 10015 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10016 SizesArrayArg, MapTypesArrayArg, Info); 10017 10018 // Emit device ID if any. 10019 llvm::Value *DeviceID = nullptr; 10020 if (Device) { 10021 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10022 CGF.Int64Ty, /*isSigned=*/true); 10023 } else { 10024 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10025 } 10026 10027 // Emit the number of elements in the offloading arrays. 10028 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10029 10030 llvm::Value *OffloadingArgs[] = { 10031 DeviceID, PointerNum, BasePointersArrayArg, 10032 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10033 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10034 OffloadingArgs); 10035 }; 10036 10037 // If we need device pointer privatization, we need to emit the body of the 10038 // region with no privatization in the 'else' branch of the conditional. 10039 // Otherwise, we don't have to do anything. 10040 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10041 PrePostActionTy &) { 10042 if (!Info.CaptureDeviceAddrMap.empty()) { 10043 CodeGen.setAction(NoPrivAction); 10044 CodeGen(CGF); 10045 } 10046 }; 10047 10048 // We don't have to do anything to close the region if the if clause evaluates 10049 // to false. 10050 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10051 10052 if (IfCond) { 10053 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10054 } else { 10055 RegionCodeGenTy RCG(BeginThenGen); 10056 RCG(CGF); 10057 } 10058 10059 // If we don't require privatization of device pointers, we emit the body in 10060 // between the runtime calls. This avoids duplicating the body code. 10061 if (Info.CaptureDeviceAddrMap.empty()) { 10062 CodeGen.setAction(NoPrivAction); 10063 CodeGen(CGF); 10064 } 10065 10066 if (IfCond) { 10067 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10068 } else { 10069 RegionCodeGenTy RCG(EndThenGen); 10070 RCG(CGF); 10071 } 10072 } 10073 10074 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10075 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10076 const Expr *Device) { 10077 if (!CGF.HaveInsertPoint()) 10078 return; 10079 10080 assert((isa<OMPTargetEnterDataDirective>(D) || 10081 isa<OMPTargetExitDataDirective>(D) || 10082 isa<OMPTargetUpdateDirective>(D)) && 10083 "Expecting either target enter, exit data, or update directives."); 10084 10085 CodeGenFunction::OMPTargetDataInfo InputInfo; 10086 llvm::Value *MapTypesArray = nullptr; 10087 // Generate the code for the opening of the data environment. 10088 auto &&ThenGen = [this, &D, Device, &InputInfo, 10089 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10090 // Emit device ID if any. 10091 llvm::Value *DeviceID = nullptr; 10092 if (Device) { 10093 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10094 CGF.Int64Ty, /*isSigned=*/true); 10095 } else { 10096 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10097 } 10098 10099 // Emit the number of elements in the offloading arrays. 10100 llvm::Constant *PointerNum = 10101 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10102 10103 llvm::Value *OffloadingArgs[] = {DeviceID, 10104 PointerNum, 10105 InputInfo.BasePointersArray.getPointer(), 10106 InputInfo.PointersArray.getPointer(), 10107 InputInfo.SizesArray.getPointer(), 10108 MapTypesArray}; 10109 10110 // Select the right runtime function call for each expected standalone 10111 // directive. 10112 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10113 OpenMPRTLFunction RTLFn; 10114 switch (D.getDirectiveKind()) { 10115 case OMPD_target_enter_data: 10116 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10117 : OMPRTL__tgt_target_data_begin; 10118 break; 10119 case OMPD_target_exit_data: 10120 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10121 : OMPRTL__tgt_target_data_end; 10122 break; 10123 case OMPD_target_update: 10124 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10125 : OMPRTL__tgt_target_data_update; 10126 break; 10127 case OMPD_parallel: 10128 case OMPD_for: 10129 case OMPD_parallel_for: 10130 case OMPD_parallel_sections: 10131 case OMPD_for_simd: 10132 case OMPD_parallel_for_simd: 10133 case OMPD_cancel: 10134 case OMPD_cancellation_point: 10135 case OMPD_ordered: 10136 case OMPD_threadprivate: 10137 case OMPD_allocate: 10138 case OMPD_task: 10139 case OMPD_simd: 10140 case OMPD_sections: 10141 case OMPD_section: 10142 case OMPD_single: 10143 case OMPD_master: 10144 case OMPD_critical: 10145 case OMPD_taskyield: 10146 case OMPD_barrier: 10147 case OMPD_taskwait: 10148 case OMPD_taskgroup: 10149 case OMPD_atomic: 10150 case OMPD_flush: 10151 case OMPD_teams: 10152 case OMPD_target_data: 10153 case OMPD_distribute: 10154 case OMPD_distribute_simd: 10155 case OMPD_distribute_parallel_for: 10156 case OMPD_distribute_parallel_for_simd: 10157 case OMPD_teams_distribute: 10158 case OMPD_teams_distribute_simd: 10159 case OMPD_teams_distribute_parallel_for: 10160 case OMPD_teams_distribute_parallel_for_simd: 10161 case OMPD_declare_simd: 10162 case OMPD_declare_variant: 10163 case OMPD_declare_target: 10164 case OMPD_end_declare_target: 10165 case OMPD_declare_reduction: 10166 case OMPD_declare_mapper: 10167 case OMPD_taskloop: 10168 case OMPD_taskloop_simd: 10169 case OMPD_master_taskloop: 10170 case OMPD_master_taskloop_simd: 10171 case OMPD_parallel_master_taskloop: 10172 case OMPD_target: 10173 case OMPD_target_simd: 10174 case OMPD_target_teams_distribute: 10175 case OMPD_target_teams_distribute_simd: 10176 case OMPD_target_teams_distribute_parallel_for: 10177 case OMPD_target_teams_distribute_parallel_for_simd: 10178 case OMPD_target_teams: 10179 case OMPD_target_parallel: 10180 case OMPD_target_parallel_for: 10181 case OMPD_target_parallel_for_simd: 10182 case OMPD_requires: 10183 case OMPD_unknown: 10184 llvm_unreachable("Unexpected standalone target data directive."); 10185 break; 10186 } 10187 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10188 }; 10189 10190 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10191 CodeGenFunction &CGF, PrePostActionTy &) { 10192 // Fill up the arrays with all the mapped variables. 10193 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10194 MappableExprsHandler::MapValuesArrayTy Pointers; 10195 MappableExprsHandler::MapValuesArrayTy Sizes; 10196 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10197 10198 // Get map clause information. 10199 MappableExprsHandler MEHandler(D, CGF); 10200 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10201 10202 TargetDataInfo Info; 10203 // Fill up the arrays and create the arguments. 10204 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10205 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10206 Info.PointersArray, Info.SizesArray, 10207 Info.MapTypesArray, Info); 10208 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10209 InputInfo.BasePointersArray = 10210 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10211 InputInfo.PointersArray = 10212 Address(Info.PointersArray, CGM.getPointerAlign()); 10213 InputInfo.SizesArray = 10214 Address(Info.SizesArray, CGM.getPointerAlign()); 10215 MapTypesArray = Info.MapTypesArray; 10216 if (D.hasClausesOfKind<OMPDependClause>()) 10217 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10218 else 10219 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10220 }; 10221 10222 if (IfCond) { 10223 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10224 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10225 } else { 10226 RegionCodeGenTy ThenRCG(TargetThenGen); 10227 ThenRCG(CGF); 10228 } 10229 } 10230 10231 namespace { 10232 /// Kind of parameter in a function with 'declare simd' directive. 10233 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10234 /// Attribute set of the parameter. 10235 struct ParamAttrTy { 10236 ParamKindTy Kind = Vector; 10237 llvm::APSInt StrideOrArg; 10238 llvm::APSInt Alignment; 10239 }; 10240 } // namespace 10241 10242 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10243 ArrayRef<ParamAttrTy> ParamAttrs) { 10244 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10245 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10246 // of that clause. The VLEN value must be power of 2. 10247 // In other case the notion of the function`s "characteristic data type" (CDT) 10248 // is used to compute the vector length. 10249 // CDT is defined in the following order: 10250 // a) For non-void function, the CDT is the return type. 10251 // b) If the function has any non-uniform, non-linear parameters, then the 10252 // CDT is the type of the first such parameter. 10253 // c) If the CDT determined by a) or b) above is struct, union, or class 10254 // type which is pass-by-value (except for the type that maps to the 10255 // built-in complex data type), the characteristic data type is int. 10256 // d) If none of the above three cases is applicable, the CDT is int. 10257 // The VLEN is then determined based on the CDT and the size of vector 10258 // register of that ISA for which current vector version is generated. The 10259 // VLEN is computed using the formula below: 10260 // VLEN = sizeof(vector_register) / sizeof(CDT), 10261 // where vector register size specified in section 3.2.1 Registers and the 10262 // Stack Frame of original AMD64 ABI document. 10263 QualType RetType = FD->getReturnType(); 10264 if (RetType.isNull()) 10265 return 0; 10266 ASTContext &C = FD->getASTContext(); 10267 QualType CDT; 10268 if (!RetType.isNull() && !RetType->isVoidType()) { 10269 CDT = RetType; 10270 } else { 10271 unsigned Offset = 0; 10272 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10273 if (ParamAttrs[Offset].Kind == Vector) 10274 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10275 ++Offset; 10276 } 10277 if (CDT.isNull()) { 10278 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10279 if (ParamAttrs[I + Offset].Kind == Vector) { 10280 CDT = FD->getParamDecl(I)->getType(); 10281 break; 10282 } 10283 } 10284 } 10285 } 10286 if (CDT.isNull()) 10287 CDT = C.IntTy; 10288 CDT = CDT->getCanonicalTypeUnqualified(); 10289 if (CDT->isRecordType() || CDT->isUnionType()) 10290 CDT = C.IntTy; 10291 return C.getTypeSize(CDT); 10292 } 10293 10294 static void 10295 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10296 const llvm::APSInt &VLENVal, 10297 ArrayRef<ParamAttrTy> ParamAttrs, 10298 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10299 struct ISADataTy { 10300 char ISA; 10301 unsigned VecRegSize; 10302 }; 10303 ISADataTy ISAData[] = { 10304 { 10305 'b', 128 10306 }, // SSE 10307 { 10308 'c', 256 10309 }, // AVX 10310 { 10311 'd', 256 10312 }, // AVX2 10313 { 10314 'e', 512 10315 }, // AVX512 10316 }; 10317 llvm::SmallVector<char, 2> Masked; 10318 switch (State) { 10319 case OMPDeclareSimdDeclAttr::BS_Undefined: 10320 Masked.push_back('N'); 10321 Masked.push_back('M'); 10322 break; 10323 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10324 Masked.push_back('N'); 10325 break; 10326 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10327 Masked.push_back('M'); 10328 break; 10329 } 10330 for (char Mask : Masked) { 10331 for (const ISADataTy &Data : ISAData) { 10332 SmallString<256> Buffer; 10333 llvm::raw_svector_ostream Out(Buffer); 10334 Out << "_ZGV" << Data.ISA << Mask; 10335 if (!VLENVal) { 10336 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10337 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10338 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10339 } else { 10340 Out << VLENVal; 10341 } 10342 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10343 switch (ParamAttr.Kind){ 10344 case LinearWithVarStride: 10345 Out << 's' << ParamAttr.StrideOrArg; 10346 break; 10347 case Linear: 10348 Out << 'l'; 10349 if (!!ParamAttr.StrideOrArg) 10350 Out << ParamAttr.StrideOrArg; 10351 break; 10352 case Uniform: 10353 Out << 'u'; 10354 break; 10355 case Vector: 10356 Out << 'v'; 10357 break; 10358 } 10359 if (!!ParamAttr.Alignment) 10360 Out << 'a' << ParamAttr.Alignment; 10361 } 10362 Out << '_' << Fn->getName(); 10363 Fn->addFnAttr(Out.str()); 10364 } 10365 } 10366 } 10367 10368 // This are the Functions that are needed to mangle the name of the 10369 // vector functions generated by the compiler, according to the rules 10370 // defined in the "Vector Function ABI specifications for AArch64", 10371 // available at 10372 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10373 10374 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10375 /// 10376 /// TODO: Need to implement the behavior for reference marked with a 10377 /// var or no linear modifiers (1.b in the section). For this, we 10378 /// need to extend ParamKindTy to support the linear modifiers. 10379 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10380 QT = QT.getCanonicalType(); 10381 10382 if (QT->isVoidType()) 10383 return false; 10384 10385 if (Kind == ParamKindTy::Uniform) 10386 return false; 10387 10388 if (Kind == ParamKindTy::Linear) 10389 return false; 10390 10391 // TODO: Handle linear references with modifiers 10392 10393 if (Kind == ParamKindTy::LinearWithVarStride) 10394 return false; 10395 10396 return true; 10397 } 10398 10399 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10400 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10401 QT = QT.getCanonicalType(); 10402 unsigned Size = C.getTypeSize(QT); 10403 10404 // Only scalars and complex within 16 bytes wide set PVB to true. 10405 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10406 return false; 10407 10408 if (QT->isFloatingType()) 10409 return true; 10410 10411 if (QT->isIntegerType()) 10412 return true; 10413 10414 if (QT->isPointerType()) 10415 return true; 10416 10417 // TODO: Add support for complex types (section 3.1.2, item 2). 10418 10419 return false; 10420 } 10421 10422 /// Computes the lane size (LS) of a return type or of an input parameter, 10423 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10424 /// TODO: Add support for references, section 3.2.1, item 1. 10425 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10426 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10427 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10428 if (getAArch64PBV(PTy, C)) 10429 return C.getTypeSize(PTy); 10430 } 10431 if (getAArch64PBV(QT, C)) 10432 return C.getTypeSize(QT); 10433 10434 return C.getTypeSize(C.getUIntPtrType()); 10435 } 10436 10437 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10438 // signature of the scalar function, as defined in 3.2.2 of the 10439 // AAVFABI. 10440 static std::tuple<unsigned, unsigned, bool> 10441 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10442 QualType RetType = FD->getReturnType().getCanonicalType(); 10443 10444 ASTContext &C = FD->getASTContext(); 10445 10446 bool OutputBecomesInput = false; 10447 10448 llvm::SmallVector<unsigned, 8> Sizes; 10449 if (!RetType->isVoidType()) { 10450 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10451 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10452 OutputBecomesInput = true; 10453 } 10454 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10455 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10456 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10457 } 10458 10459 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10460 // The LS of a function parameter / return value can only be a power 10461 // of 2, starting from 8 bits, up to 128. 10462 assert(std::all_of(Sizes.begin(), Sizes.end(), 10463 [](unsigned Size) { 10464 return Size == 8 || Size == 16 || Size == 32 || 10465 Size == 64 || Size == 128; 10466 }) && 10467 "Invalid size"); 10468 10469 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10470 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10471 OutputBecomesInput); 10472 } 10473 10474 /// Mangle the parameter part of the vector function name according to 10475 /// their OpenMP classification. The mangling function is defined in 10476 /// section 3.5 of the AAVFABI. 10477 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10478 SmallString<256> Buffer; 10479 llvm::raw_svector_ostream Out(Buffer); 10480 for (const auto &ParamAttr : ParamAttrs) { 10481 switch (ParamAttr.Kind) { 10482 case LinearWithVarStride: 10483 Out << "ls" << ParamAttr.StrideOrArg; 10484 break; 10485 case Linear: 10486 Out << 'l'; 10487 // Don't print the step value if it is not present or if it is 10488 // equal to 1. 10489 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10490 Out << ParamAttr.StrideOrArg; 10491 break; 10492 case Uniform: 10493 Out << 'u'; 10494 break; 10495 case Vector: 10496 Out << 'v'; 10497 break; 10498 } 10499 10500 if (!!ParamAttr.Alignment) 10501 Out << 'a' << ParamAttr.Alignment; 10502 } 10503 10504 return Out.str(); 10505 } 10506 10507 // Function used to add the attribute. The parameter `VLEN` is 10508 // templated to allow the use of "x" when targeting scalable functions 10509 // for SVE. 10510 template <typename T> 10511 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10512 char ISA, StringRef ParSeq, 10513 StringRef MangledName, bool OutputBecomesInput, 10514 llvm::Function *Fn) { 10515 SmallString<256> Buffer; 10516 llvm::raw_svector_ostream Out(Buffer); 10517 Out << Prefix << ISA << LMask << VLEN; 10518 if (OutputBecomesInput) 10519 Out << "v"; 10520 Out << ParSeq << "_" << MangledName; 10521 Fn->addFnAttr(Out.str()); 10522 } 10523 10524 // Helper function to generate the Advanced SIMD names depending on 10525 // the value of the NDS when simdlen is not present. 10526 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10527 StringRef Prefix, char ISA, 10528 StringRef ParSeq, StringRef MangledName, 10529 bool OutputBecomesInput, 10530 llvm::Function *Fn) { 10531 switch (NDS) { 10532 case 8: 10533 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10534 OutputBecomesInput, Fn); 10535 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10536 OutputBecomesInput, Fn); 10537 break; 10538 case 16: 10539 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10540 OutputBecomesInput, Fn); 10541 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10542 OutputBecomesInput, Fn); 10543 break; 10544 case 32: 10545 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10546 OutputBecomesInput, Fn); 10547 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10548 OutputBecomesInput, Fn); 10549 break; 10550 case 64: 10551 case 128: 10552 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10553 OutputBecomesInput, Fn); 10554 break; 10555 default: 10556 llvm_unreachable("Scalar type is too wide."); 10557 } 10558 } 10559 10560 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10561 static void emitAArch64DeclareSimdFunction( 10562 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10563 ArrayRef<ParamAttrTy> ParamAttrs, 10564 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10565 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10566 10567 // Get basic data for building the vector signature. 10568 const auto Data = getNDSWDS(FD, ParamAttrs); 10569 const unsigned NDS = std::get<0>(Data); 10570 const unsigned WDS = std::get<1>(Data); 10571 const bool OutputBecomesInput = std::get<2>(Data); 10572 10573 // Check the values provided via `simdlen` by the user. 10574 // 1. A `simdlen(1)` doesn't produce vector signatures, 10575 if (UserVLEN == 1) { 10576 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10577 DiagnosticsEngine::Warning, 10578 "The clause simdlen(1) has no effect when targeting aarch64."); 10579 CGM.getDiags().Report(SLoc, DiagID); 10580 return; 10581 } 10582 10583 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10584 // Advanced SIMD output. 10585 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10586 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10587 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10588 "power of 2 when targeting Advanced SIMD."); 10589 CGM.getDiags().Report(SLoc, DiagID); 10590 return; 10591 } 10592 10593 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10594 // limits. 10595 if (ISA == 's' && UserVLEN != 0) { 10596 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10597 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10598 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10599 "lanes in the architectural constraints " 10600 "for SVE (min is 128-bit, max is " 10601 "2048-bit, by steps of 128-bit)"); 10602 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10603 return; 10604 } 10605 } 10606 10607 // Sort out parameter sequence. 10608 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10609 StringRef Prefix = "_ZGV"; 10610 // Generate simdlen from user input (if any). 10611 if (UserVLEN) { 10612 if (ISA == 's') { 10613 // SVE generates only a masked function. 10614 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10615 OutputBecomesInput, Fn); 10616 } else { 10617 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10618 // Advanced SIMD generates one or two functions, depending on 10619 // the `[not]inbranch` clause. 10620 switch (State) { 10621 case OMPDeclareSimdDeclAttr::BS_Undefined: 10622 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10623 OutputBecomesInput, Fn); 10624 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10625 OutputBecomesInput, Fn); 10626 break; 10627 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10628 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10629 OutputBecomesInput, Fn); 10630 break; 10631 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10632 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10633 OutputBecomesInput, Fn); 10634 break; 10635 } 10636 } 10637 } else { 10638 // If no user simdlen is provided, follow the AAVFABI rules for 10639 // generating the vector length. 10640 if (ISA == 's') { 10641 // SVE, section 3.4.1, item 1. 10642 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10643 OutputBecomesInput, Fn); 10644 } else { 10645 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10646 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10647 // two vector names depending on the use of the clause 10648 // `[not]inbranch`. 10649 switch (State) { 10650 case OMPDeclareSimdDeclAttr::BS_Undefined: 10651 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10652 OutputBecomesInput, Fn); 10653 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10654 OutputBecomesInput, Fn); 10655 break; 10656 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10657 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10658 OutputBecomesInput, Fn); 10659 break; 10660 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10661 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10662 OutputBecomesInput, Fn); 10663 break; 10664 } 10665 } 10666 } 10667 } 10668 10669 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10670 llvm::Function *Fn) { 10671 ASTContext &C = CGM.getContext(); 10672 FD = FD->getMostRecentDecl(); 10673 // Map params to their positions in function decl. 10674 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10675 if (isa<CXXMethodDecl>(FD)) 10676 ParamPositions.try_emplace(FD, 0); 10677 unsigned ParamPos = ParamPositions.size(); 10678 for (const ParmVarDecl *P : FD->parameters()) { 10679 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10680 ++ParamPos; 10681 } 10682 while (FD) { 10683 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10684 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10685 // Mark uniform parameters. 10686 for (const Expr *E : Attr->uniforms()) { 10687 E = E->IgnoreParenImpCasts(); 10688 unsigned Pos; 10689 if (isa<CXXThisExpr>(E)) { 10690 Pos = ParamPositions[FD]; 10691 } else { 10692 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10693 ->getCanonicalDecl(); 10694 Pos = ParamPositions[PVD]; 10695 } 10696 ParamAttrs[Pos].Kind = Uniform; 10697 } 10698 // Get alignment info. 10699 auto NI = Attr->alignments_begin(); 10700 for (const Expr *E : Attr->aligneds()) { 10701 E = E->IgnoreParenImpCasts(); 10702 unsigned Pos; 10703 QualType ParmTy; 10704 if (isa<CXXThisExpr>(E)) { 10705 Pos = ParamPositions[FD]; 10706 ParmTy = E->getType(); 10707 } else { 10708 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10709 ->getCanonicalDecl(); 10710 Pos = ParamPositions[PVD]; 10711 ParmTy = PVD->getType(); 10712 } 10713 ParamAttrs[Pos].Alignment = 10714 (*NI) 10715 ? (*NI)->EvaluateKnownConstInt(C) 10716 : llvm::APSInt::getUnsigned( 10717 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10718 .getQuantity()); 10719 ++NI; 10720 } 10721 // Mark linear parameters. 10722 auto SI = Attr->steps_begin(); 10723 auto MI = Attr->modifiers_begin(); 10724 for (const Expr *E : Attr->linears()) { 10725 E = E->IgnoreParenImpCasts(); 10726 unsigned Pos; 10727 if (isa<CXXThisExpr>(E)) { 10728 Pos = ParamPositions[FD]; 10729 } else { 10730 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10731 ->getCanonicalDecl(); 10732 Pos = ParamPositions[PVD]; 10733 } 10734 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10735 ParamAttr.Kind = Linear; 10736 if (*SI) { 10737 Expr::EvalResult Result; 10738 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10739 if (const auto *DRE = 10740 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10741 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10742 ParamAttr.Kind = LinearWithVarStride; 10743 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10744 ParamPositions[StridePVD->getCanonicalDecl()]); 10745 } 10746 } 10747 } else { 10748 ParamAttr.StrideOrArg = Result.Val.getInt(); 10749 } 10750 } 10751 ++SI; 10752 ++MI; 10753 } 10754 llvm::APSInt VLENVal; 10755 SourceLocation ExprLoc; 10756 const Expr *VLENExpr = Attr->getSimdlen(); 10757 if (VLENExpr) { 10758 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10759 ExprLoc = VLENExpr->getExprLoc(); 10760 } 10761 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10762 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10763 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10764 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10765 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10766 unsigned VLEN = VLENVal.getExtValue(); 10767 StringRef MangledName = Fn->getName(); 10768 if (CGM.getTarget().hasFeature("sve")) 10769 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10770 MangledName, 's', 128, Fn, ExprLoc); 10771 if (CGM.getTarget().hasFeature("neon")) 10772 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10773 MangledName, 'n', 128, Fn, ExprLoc); 10774 } 10775 } 10776 FD = FD->getPreviousDecl(); 10777 } 10778 } 10779 10780 namespace { 10781 /// Cleanup action for doacross support. 10782 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10783 public: 10784 static const int DoacrossFinArgs = 2; 10785 10786 private: 10787 llvm::FunctionCallee RTLFn; 10788 llvm::Value *Args[DoacrossFinArgs]; 10789 10790 public: 10791 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10792 ArrayRef<llvm::Value *> CallArgs) 10793 : RTLFn(RTLFn) { 10794 assert(CallArgs.size() == DoacrossFinArgs); 10795 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10796 } 10797 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10798 if (!CGF.HaveInsertPoint()) 10799 return; 10800 CGF.EmitRuntimeCall(RTLFn, Args); 10801 } 10802 }; 10803 } // namespace 10804 10805 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10806 const OMPLoopDirective &D, 10807 ArrayRef<Expr *> NumIterations) { 10808 if (!CGF.HaveInsertPoint()) 10809 return; 10810 10811 ASTContext &C = CGM.getContext(); 10812 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10813 RecordDecl *RD; 10814 if (KmpDimTy.isNull()) { 10815 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10816 // kmp_int64 lo; // lower 10817 // kmp_int64 up; // upper 10818 // kmp_int64 st; // stride 10819 // }; 10820 RD = C.buildImplicitRecord("kmp_dim"); 10821 RD->startDefinition(); 10822 addFieldToRecordDecl(C, RD, Int64Ty); 10823 addFieldToRecordDecl(C, RD, Int64Ty); 10824 addFieldToRecordDecl(C, RD, Int64Ty); 10825 RD->completeDefinition(); 10826 KmpDimTy = C.getRecordType(RD); 10827 } else { 10828 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10829 } 10830 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10831 QualType ArrayTy = 10832 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10833 10834 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10835 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10836 enum { LowerFD = 0, UpperFD, StrideFD }; 10837 // Fill dims with data. 10838 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10839 LValue DimsLVal = CGF.MakeAddrLValue( 10840 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10841 // dims.upper = num_iterations; 10842 LValue UpperLVal = CGF.EmitLValueForField( 10843 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10844 llvm::Value *NumIterVal = 10845 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10846 D.getNumIterations()->getType(), Int64Ty, 10847 D.getNumIterations()->getExprLoc()); 10848 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10849 // dims.stride = 1; 10850 LValue StrideLVal = CGF.EmitLValueForField( 10851 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10852 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10853 StrideLVal); 10854 } 10855 10856 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10857 // kmp_int32 num_dims, struct kmp_dim * dims); 10858 llvm::Value *Args[] = { 10859 emitUpdateLocation(CGF, D.getBeginLoc()), 10860 getThreadID(CGF, D.getBeginLoc()), 10861 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10862 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10863 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10864 CGM.VoidPtrTy)}; 10865 10866 llvm::FunctionCallee RTLFn = 10867 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10868 CGF.EmitRuntimeCall(RTLFn, Args); 10869 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10870 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10871 llvm::FunctionCallee FiniRTLFn = 10872 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10873 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10874 llvm::makeArrayRef(FiniArgs)); 10875 } 10876 10877 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10878 const OMPDependClause *C) { 10879 QualType Int64Ty = 10880 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10881 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10882 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10883 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10884 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10885 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10886 const Expr *CounterVal = C->getLoopData(I); 10887 assert(CounterVal); 10888 llvm::Value *CntVal = CGF.EmitScalarConversion( 10889 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10890 CounterVal->getExprLoc()); 10891 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10892 /*Volatile=*/false, Int64Ty); 10893 } 10894 llvm::Value *Args[] = { 10895 emitUpdateLocation(CGF, C->getBeginLoc()), 10896 getThreadID(CGF, C->getBeginLoc()), 10897 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10898 llvm::FunctionCallee RTLFn; 10899 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10900 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10901 } else { 10902 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10903 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10904 } 10905 CGF.EmitRuntimeCall(RTLFn, Args); 10906 } 10907 10908 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10909 llvm::FunctionCallee Callee, 10910 ArrayRef<llvm::Value *> Args) const { 10911 assert(Loc.isValid() && "Outlined function call location must be valid."); 10912 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10913 10914 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10915 if (Fn->doesNotThrow()) { 10916 CGF.EmitNounwindRuntimeCall(Fn, Args); 10917 return; 10918 } 10919 } 10920 CGF.EmitRuntimeCall(Callee, Args); 10921 } 10922 10923 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10924 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10925 ArrayRef<llvm::Value *> Args) const { 10926 emitCall(CGF, Loc, OutlinedFn, Args); 10927 } 10928 10929 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10930 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10931 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10932 HasEmittedDeclareTargetRegion = true; 10933 } 10934 10935 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10936 const VarDecl *NativeParam, 10937 const VarDecl *TargetParam) const { 10938 return CGF.GetAddrOfLocalVar(NativeParam); 10939 } 10940 10941 namespace { 10942 /// Cleanup action for allocate support. 10943 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10944 public: 10945 static const int CleanupArgs = 3; 10946 10947 private: 10948 llvm::FunctionCallee RTLFn; 10949 llvm::Value *Args[CleanupArgs]; 10950 10951 public: 10952 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10953 ArrayRef<llvm::Value *> CallArgs) 10954 : RTLFn(RTLFn) { 10955 assert(CallArgs.size() == CleanupArgs && 10956 "Size of arguments does not match."); 10957 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10958 } 10959 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10960 if (!CGF.HaveInsertPoint()) 10961 return; 10962 CGF.EmitRuntimeCall(RTLFn, Args); 10963 } 10964 }; 10965 } // namespace 10966 10967 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10968 const VarDecl *VD) { 10969 if (!VD) 10970 return Address::invalid(); 10971 const VarDecl *CVD = VD->getCanonicalDecl(); 10972 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10973 return Address::invalid(); 10974 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10975 // Use the default allocation. 10976 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10977 !AA->getAllocator()) 10978 return Address::invalid(); 10979 llvm::Value *Size; 10980 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10981 if (CVD->getType()->isVariablyModifiedType()) { 10982 Size = CGF.getTypeSize(CVD->getType()); 10983 // Align the size: ((size + align - 1) / align) * align 10984 Size = CGF.Builder.CreateNUWAdd( 10985 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10986 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10987 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10988 } else { 10989 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10990 Size = CGM.getSize(Sz.alignTo(Align)); 10991 } 10992 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10993 assert(AA->getAllocator() && 10994 "Expected allocator expression for non-default allocator."); 10995 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10996 // According to the standard, the original allocator type is a enum (integer). 10997 // Convert to pointer type, if required. 10998 if (Allocator->getType()->isIntegerTy()) 10999 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11000 else if (Allocator->getType()->isPointerTy()) 11001 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11002 CGM.VoidPtrTy); 11003 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11004 11005 llvm::Value *Addr = 11006 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11007 CVD->getName() + ".void.addr"); 11008 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11009 Allocator}; 11010 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11011 11012 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11013 llvm::makeArrayRef(FiniArgs)); 11014 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11015 Addr, 11016 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11017 CVD->getName() + ".addr"); 11018 return Address(Addr, Align); 11019 } 11020 11021 /// Checks current context and returns true if it matches the context selector. 11022 template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, 11023 OMPDeclareVariantAttr::CtxSelectorType Ctx> 11024 static bool checkContext(const OMPDeclareVariantAttr *A) { 11025 assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && 11026 Ctx != OMPDeclareVariantAttr::CtxUnknown && 11027 "Unknown context selector or context selector set."); 11028 return false; 11029 } 11030 11031 /// Checks for implementation={vendor(<vendor>)} context selector. 11032 /// \returns true iff <vendor>="llvm", false otherwise. 11033 template <> 11034 bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11035 OMPDeclareVariantAttr::CtxVendor>( 11036 const OMPDeclareVariantAttr *A) { 11037 return llvm::all_of(A->implVendors(), 11038 [](StringRef S) { return !S.compare_lower("llvm"); }); 11039 } 11040 11041 static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { 11042 // If both scores are unknown, choose the very first one. 11043 if (!LHS && !RHS) 11044 return true; 11045 // If only one is known, return this one. 11046 if (LHS && !RHS) 11047 return true; 11048 if (!LHS && RHS) 11049 return false; 11050 llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); 11051 llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); 11052 return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; 11053 } 11054 11055 namespace { 11056 /// Comparator for the priority queue for context selector. 11057 class OMPDeclareVariantAttrComparer 11058 : public std::greater<const OMPDeclareVariantAttr *> { 11059 private: 11060 ASTContext &Ctx; 11061 11062 public: 11063 OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} 11064 bool operator()(const OMPDeclareVariantAttr *LHS, 11065 const OMPDeclareVariantAttr *RHS) const { 11066 const Expr *LHSExpr = nullptr; 11067 const Expr *RHSExpr = nullptr; 11068 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11069 LHSExpr = LHS->getScore(); 11070 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11071 RHSExpr = RHS->getScore(); 11072 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11073 } 11074 }; 11075 } // anonymous namespace 11076 11077 /// Finds the variant function that matches current context with its context 11078 /// selector. 11079 static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, 11080 const FunctionDecl *FD) { 11081 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11082 return FD; 11083 // Iterate through all DeclareVariant attributes and check context selectors. 11084 auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, 11085 const OMPDeclareVariantAttr *RHS) { 11086 const Expr *LHSExpr = nullptr; 11087 const Expr *RHSExpr = nullptr; 11088 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11089 LHSExpr = LHS->getScore(); 11090 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11091 RHSExpr = RHS->getScore(); 11092 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11093 }; 11094 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11095 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11096 const OMPDeclareVariantAttr *SelectedAttr = nullptr; 11097 switch (A->getCtxSelectorSet()) { 11098 case OMPDeclareVariantAttr::CtxSetImplementation: 11099 switch (A->getCtxSelector()) { 11100 case OMPDeclareVariantAttr::CtxVendor: 11101 if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11102 OMPDeclareVariantAttr::CtxVendor>(A)) 11103 SelectedAttr = A; 11104 break; 11105 case OMPDeclareVariantAttr::CtxUnknown: 11106 llvm_unreachable( 11107 "Unknown context selector in implementation selector set."); 11108 } 11109 break; 11110 case OMPDeclareVariantAttr::CtxSetUnknown: 11111 llvm_unreachable("Unknown context selector set."); 11112 } 11113 // If the attribute matches the context, find the attribute with the highest 11114 // score. 11115 if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) 11116 TopMostAttr = SelectedAttr; 11117 } 11118 if (!TopMostAttr) 11119 return FD; 11120 return cast<FunctionDecl>( 11121 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11122 ->getDecl()); 11123 } 11124 11125 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11126 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11127 // If the original function is defined already, use its definition. 11128 StringRef MangledName = CGM.getMangledName(GD); 11129 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11130 if (Orig && !Orig->isDeclaration()) 11131 return false; 11132 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); 11133 // Emit original function if it does not have declare variant attribute or the 11134 // context does not match. 11135 if (NewFD == D) 11136 return false; 11137 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11138 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11139 DeferredVariantFunction.erase(D); 11140 return true; 11141 } 11142 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11143 return true; 11144 } 11145 11146 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11147 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11148 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11149 llvm_unreachable("Not supported in SIMD-only mode"); 11150 } 11151 11152 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11153 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11154 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11155 llvm_unreachable("Not supported in SIMD-only mode"); 11156 } 11157 11158 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11159 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11160 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11161 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11162 bool Tied, unsigned &NumberOfParts) { 11163 llvm_unreachable("Not supported in SIMD-only mode"); 11164 } 11165 11166 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11167 SourceLocation Loc, 11168 llvm::Function *OutlinedFn, 11169 ArrayRef<llvm::Value *> CapturedVars, 11170 const Expr *IfCond) { 11171 llvm_unreachable("Not supported in SIMD-only mode"); 11172 } 11173 11174 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11175 CodeGenFunction &CGF, StringRef CriticalName, 11176 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11177 const Expr *Hint) { 11178 llvm_unreachable("Not supported in SIMD-only mode"); 11179 } 11180 11181 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11182 const RegionCodeGenTy &MasterOpGen, 11183 SourceLocation Loc) { 11184 llvm_unreachable("Not supported in SIMD-only mode"); 11185 } 11186 11187 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11188 SourceLocation Loc) { 11189 llvm_unreachable("Not supported in SIMD-only mode"); 11190 } 11191 11192 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11193 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11194 SourceLocation Loc) { 11195 llvm_unreachable("Not supported in SIMD-only mode"); 11196 } 11197 11198 void CGOpenMPSIMDRuntime::emitSingleRegion( 11199 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11200 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11201 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11202 ArrayRef<const Expr *> AssignmentOps) { 11203 llvm_unreachable("Not supported in SIMD-only mode"); 11204 } 11205 11206 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11207 const RegionCodeGenTy &OrderedOpGen, 11208 SourceLocation Loc, 11209 bool IsThreads) { 11210 llvm_unreachable("Not supported in SIMD-only mode"); 11211 } 11212 11213 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11214 SourceLocation Loc, 11215 OpenMPDirectiveKind Kind, 11216 bool EmitChecks, 11217 bool ForceSimpleCall) { 11218 llvm_unreachable("Not supported in SIMD-only mode"); 11219 } 11220 11221 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11222 CodeGenFunction &CGF, SourceLocation Loc, 11223 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11224 bool Ordered, const DispatchRTInput &DispatchValues) { 11225 llvm_unreachable("Not supported in SIMD-only mode"); 11226 } 11227 11228 void CGOpenMPSIMDRuntime::emitForStaticInit( 11229 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11230 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11231 llvm_unreachable("Not supported in SIMD-only mode"); 11232 } 11233 11234 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11235 CodeGenFunction &CGF, SourceLocation Loc, 11236 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11237 llvm_unreachable("Not supported in SIMD-only mode"); 11238 } 11239 11240 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11241 SourceLocation Loc, 11242 unsigned IVSize, 11243 bool IVSigned) { 11244 llvm_unreachable("Not supported in SIMD-only mode"); 11245 } 11246 11247 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11248 SourceLocation Loc, 11249 OpenMPDirectiveKind DKind) { 11250 llvm_unreachable("Not supported in SIMD-only mode"); 11251 } 11252 11253 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11254 SourceLocation Loc, 11255 unsigned IVSize, bool IVSigned, 11256 Address IL, Address LB, 11257 Address UB, Address ST) { 11258 llvm_unreachable("Not supported in SIMD-only mode"); 11259 } 11260 11261 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11262 llvm::Value *NumThreads, 11263 SourceLocation Loc) { 11264 llvm_unreachable("Not supported in SIMD-only mode"); 11265 } 11266 11267 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11268 OpenMPProcBindClauseKind ProcBind, 11269 SourceLocation Loc) { 11270 llvm_unreachable("Not supported in SIMD-only mode"); 11271 } 11272 11273 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11274 const VarDecl *VD, 11275 Address VDAddr, 11276 SourceLocation Loc) { 11277 llvm_unreachable("Not supported in SIMD-only mode"); 11278 } 11279 11280 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11281 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11282 CodeGenFunction *CGF) { 11283 llvm_unreachable("Not supported in SIMD-only mode"); 11284 } 11285 11286 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11287 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11288 llvm_unreachable("Not supported in SIMD-only mode"); 11289 } 11290 11291 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11292 ArrayRef<const Expr *> Vars, 11293 SourceLocation Loc) { 11294 llvm_unreachable("Not supported in SIMD-only mode"); 11295 } 11296 11297 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11298 const OMPExecutableDirective &D, 11299 llvm::Function *TaskFunction, 11300 QualType SharedsTy, Address Shareds, 11301 const Expr *IfCond, 11302 const OMPTaskDataTy &Data) { 11303 llvm_unreachable("Not supported in SIMD-only mode"); 11304 } 11305 11306 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11307 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11308 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11309 const Expr *IfCond, const OMPTaskDataTy &Data) { 11310 llvm_unreachable("Not supported in SIMD-only mode"); 11311 } 11312 11313 void CGOpenMPSIMDRuntime::emitReduction( 11314 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11315 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11316 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11317 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11318 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11319 ReductionOps, Options); 11320 } 11321 11322 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11323 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11324 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11325 llvm_unreachable("Not supported in SIMD-only mode"); 11326 } 11327 11328 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11329 SourceLocation Loc, 11330 ReductionCodeGen &RCG, 11331 unsigned N) { 11332 llvm_unreachable("Not supported in SIMD-only mode"); 11333 } 11334 11335 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11336 SourceLocation Loc, 11337 llvm::Value *ReductionsPtr, 11338 LValue SharedLVal) { 11339 llvm_unreachable("Not supported in SIMD-only mode"); 11340 } 11341 11342 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11343 SourceLocation Loc) { 11344 llvm_unreachable("Not supported in SIMD-only mode"); 11345 } 11346 11347 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11348 CodeGenFunction &CGF, SourceLocation Loc, 11349 OpenMPDirectiveKind CancelRegion) { 11350 llvm_unreachable("Not supported in SIMD-only mode"); 11351 } 11352 11353 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11354 SourceLocation Loc, const Expr *IfCond, 11355 OpenMPDirectiveKind CancelRegion) { 11356 llvm_unreachable("Not supported in SIMD-only mode"); 11357 } 11358 11359 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11360 const OMPExecutableDirective &D, StringRef ParentName, 11361 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11362 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11363 llvm_unreachable("Not supported in SIMD-only mode"); 11364 } 11365 11366 void CGOpenMPSIMDRuntime::emitTargetCall( 11367 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11368 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11369 const Expr *Device, 11370 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11371 const OMPLoopDirective &D)> 11372 SizeEmitter) { 11373 llvm_unreachable("Not supported in SIMD-only mode"); 11374 } 11375 11376 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11377 llvm_unreachable("Not supported in SIMD-only mode"); 11378 } 11379 11380 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11381 llvm_unreachable("Not supported in SIMD-only mode"); 11382 } 11383 11384 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11385 return false; 11386 } 11387 11388 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11389 const OMPExecutableDirective &D, 11390 SourceLocation Loc, 11391 llvm::Function *OutlinedFn, 11392 ArrayRef<llvm::Value *> CapturedVars) { 11393 llvm_unreachable("Not supported in SIMD-only mode"); 11394 } 11395 11396 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11397 const Expr *NumTeams, 11398 const Expr *ThreadLimit, 11399 SourceLocation Loc) { 11400 llvm_unreachable("Not supported in SIMD-only mode"); 11401 } 11402 11403 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11404 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11405 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11406 llvm_unreachable("Not supported in SIMD-only mode"); 11407 } 11408 11409 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11410 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11411 const Expr *Device) { 11412 llvm_unreachable("Not supported in SIMD-only mode"); 11413 } 11414 11415 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11416 const OMPLoopDirective &D, 11417 ArrayRef<Expr *> NumIterations) { 11418 llvm_unreachable("Not supported in SIMD-only mode"); 11419 } 11420 11421 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11422 const OMPDependClause *C) { 11423 llvm_unreachable("Not supported in SIMD-only mode"); 11424 } 11425 11426 const VarDecl * 11427 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11428 const VarDecl *NativeParam) const { 11429 llvm_unreachable("Not supported in SIMD-only mode"); 11430 } 11431 11432 Address 11433 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11434 const VarDecl *NativeParam, 11435 const VarDecl *TargetParam) const { 11436 llvm_unreachable("Not supported in SIMD-only mode"); 11437 } 11438