1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGDebugInfo.h"
18 #include "CGRecordLayout.h"
19 #include "CodeGenFunction.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/APValue.h"
22 #include "clang/AST/Attr.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OpenMPClause.h"
25 #include "clang/AST/StmtOpenMP.h"
26 #include "clang/AST/StmtVisitor.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <numeric>
44 #include <optional>
45
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
emitUntiedSwitch(CodeGenFunction &)92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
getRegionKind() const94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
getDirectiveKind() const96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
hasCancel() const98 bool hasCancel() const { return HasCancel; }
99
classof(const CGCapturedStmtInfo * Info)100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106 protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111 };
112
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
getThreadIDVariable() const128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
getHelperName() const131 StringRef getHelperName() const override { return HelperName; }
132
classof(const CGCapturedStmtInfo * Info)133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139 private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144 };
145
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
emitUntiedSwitch(CodeGenFunction & CGF) const177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
getNumberOfParts() const195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
getThreadIDVariable() const209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
getHelperName() const215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
emitUntiedSwitch(CodeGenFunction & CGF)217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
classof(const CGCapturedStmtInfo * Info)221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227 private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233 };
234
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
getContextValue() const247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
setContextValue(llvm::Value * V)253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
getThisFieldDecl() const270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
getThreadIDVariable() const278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
getHelperName() const292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
emitUntiedSwitch(CodeGenFunction & CGF)298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
getOldCSI() const303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
classof(const CGCapturedStmtInfo * Info)305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312 private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
getThreadIDVariable() const333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
getHelperName() const336 StringRef getHelperName() const override { return HelperName; }
337
classof(const CGCapturedStmtInfo * Info)338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343 private:
344 StringRef HelperName;
345 };
346
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
getThreadIDVariable() const393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
getHelperName() const398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
classof(const CGCapturedStmtInfo * Info)402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404 private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417 public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
~InlinedOpenMPRegionRAII()438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450 };
451
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480
481 /// Describes ident structure that describes a source location.
482 /// All descriptions are taken from
483 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484 /// Original structure:
485 /// typedef struct ident {
486 /// kmp_int32 reserved_1; /**< might be used in Fortran;
487 /// see above */
488 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489 /// KMP_IDENT_KMPC identifies this union
490 /// member */
491 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492 /// see above */
493 ///#if USE_ITT_BUILD
494 /// /* but currently used for storing
495 /// region-specific ITT */
496 /// /* contextual information. */
497 ///#endif /* USE_ITT_BUILD */
498 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499 /// C++ */
500 /// char const *psource; /**< String describing the source location.
501 /// The string is composed of semi-colon separated
502 // fields which describe the source file,
503 /// the function and a pair of line numbers that
504 /// delimit the construct.
505 /// */
506 /// } ident_t;
507 enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520 };
521
522 /// Schedule types for 'omp for' loops (these enumerators are taken from
523 /// the enum sched_type in kmp.h).
524 enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552 };
553
554 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555 /// region.
556 class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559 public:
CleanupTy(PrePostActionTy * Action)560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566 };
567
568 } // anonymous namespace
569
operator ()(CodeGenFunction & CGF) const570 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571 CodeGenFunction::RunCleanupsScope Scope(CGF);
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579 }
580
581 /// Check if the combiner is a call to UDR combiner and if it is so return the
582 /// UDR decl used for reduction.
583 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)584 getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592 }
593
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)594 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
614 RValue Func = RValue::get(Reduction.second);
615 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
631 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646 }
647
648 /// Emit initialization of arrays of complex types.
649 /// \param DestAddr Address of the array.
650 /// \param Type Type of array.
651 /// \param Init Initial expression of array.
652 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())653 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
706 CodeGenFunction::RunCleanupsScope InitScope(CGF);
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735 }
736
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)737 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739 }
740
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)741 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746 }
747
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,const OMPDeclareReductionDecl * DRD)748 void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763 }
764
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)765 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766 ArrayRef<const Expr *> Origs,
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782 }
783
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)784 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797 }
798
emitAggregateType(CodeGenFunction & CGF,unsigned N)799 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
825 CodeGenFunction::OpaqueValueMapping OpaqueMap(
826 CGF,
827 cast<OpaqueValueExpr>(
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831 }
832
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)833 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
842 CodeGenFunction::OpaqueValueMapping OpaqueMap(
843 CGF,
844 cast<OpaqueValueExpr>(
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848 }
849
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)850 void ReductionCodeGen::emitInitialization(
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873 }
874
needCleanups(unsigned N)875 bool ReductionCodeGen::needCleanups(unsigned N) {
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879 }
880
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)881 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890 }
891
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)892 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909 }
910
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,Address OriginalBaseAddress,llvm::Value * Addr)911 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912 Address OriginalBaseAddress, llvm::Value *Addr) {
913 RawAddress Tmp = RawAddress::invalid();
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
929 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
935 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938 }
939
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)940 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
948 DE = cast<DeclRefExpr>(Base);
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
954 DE = cast<DeclRefExpr>(Base);
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958 }
959
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)960 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
974 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985 }
986
usesReductionInitializer(unsigned N) const987 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991 }
992
getThreadIDVariableLValue(CodeGenFunction & CGF)993 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997 }
998
EmitBody(CodeGenFunction & CGF,const Stmt * S)999 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1009 CGF.incrementProfileCounter(S);
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012 }
1013
getThreadIDVariableLValue(CodeGenFunction & CGF)1014 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1018 AlignmentSource::Decl);
1019 }
1020
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1021 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030 }
1031
CGOpenMPRuntime(CodeGenModule & CGM)1032 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 OMPBuilder.initialize();
1041 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1042 ? CGM.getLangOpts().OMPHostIRFile
1043 : StringRef{});
1044 OMPBuilder.setConfig(Config);
1045
1046 // The user forces the compiler to behave as if omp requires
1047 // unified_shared_memory was given.
1048 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 HasRequiresUnifiedSharedMemory = true;
1050 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051 }
1052 }
1053
clear()1054 void CGOpenMPRuntime::clear() {
1055 InternalVars.clear();
1056 // Clean non-target variable declarations possibly used only in debug info.
1057 for (const auto &Data : EmittedNonTargetVariables) {
1058 if (!Data.getValue().pointsToAliveValue())
1059 continue;
1060 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1061 if (!GV)
1062 continue;
1063 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064 continue;
1065 GV->eraseFromParent();
1066 }
1067 }
1068
getName(ArrayRef<StringRef> Parts) const1069 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070 return OMPBuilder.createPlatformSpecificName(Parts);
1071 }
1072
1073 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1074 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075 const Expr *CombinerInitializer, const VarDecl *In,
1076 const VarDecl *Out, bool IsCombiner) {
1077 // void .omp_combiner.(Ty *in, Ty *out);
1078 ASTContext &C = CGM.getContext();
1079 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1080 FunctionArgList Args;
1081 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 Args.push_back(&OmpOutParm);
1086 Args.push_back(&OmpInParm);
1087 const CGFunctionInfo &FnInfo =
1088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1089 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1090 std::string Name = CGM.getOpenMPRuntime().getName(
1091 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1093 Name, &CGM.getModule());
1094 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1095 if (CGM.getLangOpts().Optimize) {
1096 Fn->removeFnAttr(llvm::Attribute::NoInline);
1097 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1098 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099 }
1100 CodeGenFunction CGF(CGM);
1101 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1104 Out->getLocation());
1105 CodeGenFunction::OMPPrivateScope Scope(CGF);
1106 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1107 Scope.addPrivate(
1108 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1109 .getAddress());
1110 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1111 Scope.addPrivate(
1112 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 (void)Scope.Privatize();
1115 if (!IsCombiner && Out->hasInit() &&
1116 !CGF.isTrivialInitializer(Out->getInit())) {
1117 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1118 Out->getType().getQualifiers(),
1119 /*IsInitializer=*/true);
1120 }
1121 if (CombinerInitializer)
1122 CGF.EmitIgnoredExpr(CombinerInitializer);
1123 Scope.ForceCleanup();
1124 CGF.FinishFunction();
1125 return Fn;
1126 }
1127
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1128 void CGOpenMPRuntime::emitUserDefinedReduction(
1129 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130 if (UDRMap.count(D) > 0)
1131 return;
1132 llvm::Function *Combiner = emitCombinerOrInitializer(
1133 CGM, D->getType(), D->getCombiner(),
1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1135 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1136 /*IsCombiner=*/true);
1137 llvm::Function *Initializer = nullptr;
1138 if (const Expr *Init = D->getInitializer()) {
1139 Initializer = emitCombinerOrInitializer(
1140 CGM, D->getType(),
1141 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142 : nullptr,
1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1144 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1145 /*IsCombiner=*/false);
1146 }
1147 UDRMap.try_emplace(D, Combiner, Initializer);
1148 if (CGF)
1149 FunctionUDRMap[CGF->CurFn].push_back(D);
1150 }
1151
1152 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1153 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154 auto I = UDRMap.find(D);
1155 if (I != UDRMap.end())
1156 return I->second;
1157 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158 return UDRMap.lookup(D);
1159 }
1160
1161 namespace {
1162 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163 // Builder if one is present.
1164 struct PushAndPopStackRAII {
PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1165 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166 bool HasCancel, llvm::omp::Directive Kind)
1167 : OMPBuilder(OMPBuilder) {
1168 if (!OMPBuilder)
1169 return;
1170
1171 // The following callback is the crucial part of clangs cleanup process.
1172 //
1173 // NOTE:
1174 // Once the OpenMPIRBuilder is used to create parallel regions (and
1175 // similar), the cancellation destination (Dest below) is determined via
1176 // IP. That means if we have variables to finalize we split the block at IP,
1177 // use the new block (=BB) as destination to build a JumpDest (via
1178 // getJumpDestInCurrentScope(BB)) which then is fed to
1179 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180 // to push & pop an FinalizationInfo object.
1181 // The FiniCB will still be needed but at the point where the
1182 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184 assert(IP.getBlock()->end() == IP.getPoint() &&
1185 "Clang CG should cause non-terminated block!");
1186 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187 CGF.Builder.restoreIP(IP);
1188 CodeGenFunction::JumpDest Dest =
1189 CGF.getOMPCancelDestination(OMPD_parallel);
1190 CGF.EmitBranchThroughCleanup(Dest);
1191 return llvm::Error::success();
1192 };
1193
1194 // TODO: Remove this once we emit parallel regions through the
1195 // OpenMPIRBuilder as it can do this setup internally.
1196 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1197 OMPBuilder->pushFinalizationCB(std::move(FI));
1198 }
~PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1199 ~PushAndPopStackRAII() {
1200 if (OMPBuilder)
1201 OMPBuilder->popFinalizationCB();
1202 }
1203 llvm::OpenMPIRBuilder *OMPBuilder;
1204 };
1205 } // namespace
1206
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1207 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211 assert(ThreadIDVar->getType()->isPointerType() &&
1212 "thread id variable must be of type kmp_int32 *");
1213 CodeGenFunction CGF(CGM, true);
1214 bool HasCancel = false;
1215 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1216 HasCancel = OPD->hasCancel();
1217 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1218 HasCancel = OPD->hasCancel();
1219 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1220 HasCancel = OPSD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1224 HasCancel = OPFD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD =
1228 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233
1234 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235 // parallel region to make cancellation barriers work properly.
1236 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239 HasCancel, OutlinedHelperName);
1240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1242 }
1243
getOutlinedHelperName(StringRef Name) const1244 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245 std::string Suffix = getName({"omp_outlined"});
1246 return (Name + Suffix).str();
1247 }
1248
getOutlinedHelperName(CodeGenFunction & CGF) const1249 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250 return getOutlinedHelperName(CGF.CurFn->getName());
1251 }
1252
getReductionFuncName(StringRef Name) const1253 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1255 return (Name + Suffix).str();
1256 }
1257
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1258 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261 const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263 return emitParallelOrTeamsOutlinedFunction(
1264 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1265 CodeGen);
1266 }
1267
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1268 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275 CodeGen);
1276 }
1277
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1278 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282 bool Tied, unsigned &NumberOfParts) {
1283 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284 PrePostActionTy &) {
1285 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1286 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1287 llvm::Value *TaskArgs[] = {
1288 UpLoc, ThreadID,
1289 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1290 TaskTVar->getType()->castAs<PointerType>())
1291 .getPointer(CGF)};
1292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1293 CGM.getModule(), OMPRTL___kmpc_omp_task),
1294 TaskArgs);
1295 };
1296 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297 UntiedCodeGen);
1298 CodeGen.setAction(Action);
1299 assert(!ThreadIDVar->getType()->isPointerType() &&
1300 "thread id variable must be of type kmp_int32 for tasks");
1301 const OpenMPDirectiveKind Region =
1302 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1303 : OMPD_task;
1304 const CapturedStmt *CS = D.getCapturedStmt(Region);
1305 bool HasCancel = false;
1306 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1309 HasCancel = TD->hasCancel();
1310 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314
1315 CodeGenFunction CGF(CGM, true);
1316 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317 InnermostKind, HasCancel, Action);
1318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1320 if (!Tied)
1321 NumberOfParts = Action.getNumberOfParts();
1322 return Res;
1323 }
1324
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1325 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326 bool AtCurrentPoint) {
1327 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329
1330 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1331 if (AtCurrentPoint) {
1332 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333 CGF.Builder.GetInsertBlock());
1334 } else {
1335 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1337 }
1338 }
1339
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1340 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342 if (Elem.ServiceInsertPt) {
1343 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344 Elem.ServiceInsertPt = nullptr;
1345 Ptr->eraseFromParent();
1346 }
1347 }
1348
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1349 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350 SourceLocation Loc,
1351 SmallString<128> &Buffer) {
1352 llvm::raw_svector_ostream OS(Buffer);
1353 // Build debug location
1354 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355 OS << ";";
1356 if (auto *DbgInfo = CGF.getDebugInfo())
1357 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358 else
1359 OS << PLoc.getFilename();
1360 OS << ";";
1361 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1362 OS << FD->getQualifiedNameAsString();
1363 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364 return OS.str();
1365 }
1366
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags,bool EmitLoc)1367 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368 SourceLocation Loc,
1369 unsigned Flags, bool EmitLoc) {
1370 uint32_t SrcLocStrSize;
1371 llvm::Constant *SrcLocStr;
1372 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373 llvm::codegenoptions::NoDebugInfo) ||
1374 Loc.isInvalid()) {
1375 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376 } else {
1377 std::string FunctionName;
1378 std::string FileName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 if (auto *DbgInfo = CGF.getDebugInfo())
1383 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384 else
1385 FileName = PLoc.getFilename();
1386 unsigned Line = PLoc.getLine();
1387 unsigned Column = PLoc.getColumn();
1388 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389 Column, SrcLocStrSize);
1390 }
1391 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392 return OMPBuilder.getOrCreateIdent(
1393 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1394 }
1395
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1396 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397 SourceLocation Loc) {
1398 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400 // the clang invariants used below might be broken.
1401 if (CGM.getLangOpts().OpenMPIRBuilder) {
1402 SmallString<128> Buffer;
1403 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1404 uint32_t SrcLocStrSize;
1405 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407 return OMPBuilder.getOrCreateThreadID(
1408 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 }
1410
1411 llvm::Value *ThreadID = nullptr;
1412 // Check whether we've already cached a load of the thread id in this
1413 // function.
1414 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1415 if (I != OpenMPLocThreadIDMap.end()) {
1416 ThreadID = I->second.ThreadID;
1417 if (ThreadID != nullptr)
1418 return ThreadID;
1419 }
1420 // If exceptions are enabled, do not use parameter to avoid possible crash.
1421 if (auto *OMPRegionInfo =
1422 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1423 if (OMPRegionInfo->getThreadIDVariable()) {
1424 // Check if this an outlined function with thread id passed as argument.
1425 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428 !CGF.getLangOpts().CXXExceptions ||
1429 CGF.Builder.GetInsertBlock() == TopBlock ||
1430 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432 TopBlock ||
1433 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1434 CGF.Builder.GetInsertBlock()) {
1435 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1436 // If value loaded in entry block, cache it and use it everywhere in
1437 // function.
1438 if (CGF.Builder.GetInsertBlock() == TopBlock)
1439 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450 if (!Elem.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1455 llvm::CallInst *Call = CGF.Builder.CreateCall(
1456 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1457 OMPRTL___kmpc_global_thread_num),
1458 emitUpdateLocation(CGF, Loc));
1459 Call->setCallingConv(CGF.getRuntimeCC());
1460 Elem.ThreadID = Call;
1461 return Call;
1462 }
1463
functionFinished(CodeGenFunction & CGF)1464 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467 clearLocThreadIdInsertPt(CGF);
1468 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1469 }
1470 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1471 for (const auto *D : I->second)
1472 UDRMap.erase(D);
1473 FunctionUDRMap.erase(I);
1474 }
1475 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1476 for (const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483
getIdentTyPointerTy()1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486 }
1487
1488 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
convertDeviceClause(const VarDecl * VD)1489 convertDeviceClause(const VarDecl *VD) {
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1508 }
1509 }
1510
1511 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
convertCaptureClause(const VarDecl * VD)1512 convertCaptureClause(const VarDecl *VD) {
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1530 }
1531 }
1532
getEntryInfoFromPresumedLoc(CodeGenModule & CGM,llvm::OpenMPIRBuilder & OMPBuilder,SourceLocation BeginLoc,llvm::StringRef ParentName="")1533 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536
1537 auto FileInfoCallBack = [&]() {
1538 SourceManager &SM = CGM.getContext().getSourceManager();
1539 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1540
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1543 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1544 }
1545
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547 };
1548
1549 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1550 }
1551
getAddrOfDeclareTargetVar(const VarDecl * VD)1552 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1554
1555 auto LinkageForVariable = [&VD, this]() {
1556 return CGM.getLLVMLinkageVarDefinition(VD);
1557 };
1558
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562 CGM.getContext().getPointerType(VD->getType()));
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564 convertCaptureClause(VD), convertDeviceClause(VD),
1565 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566 VD->isExternallyVisible(),
1567 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568 VD->getCanonicalDecl()->getBeginLoc()),
1569 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1570 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1571 LinkageForVariable);
1572
1573 if (!addr)
1574 return ConstantAddress::invalid();
1575 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1576 }
1577
1578 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1579 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581 !CGM.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName({"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1586 }
1587
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1588 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589 const VarDecl *VD,
1590 Address VDAddr,
1591 SourceLocation Loc) {
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1593 CGM.getContext().getTargetInfo().isTLSSupported())
1594 return VDAddr;
1595
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1598 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1600 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1601 getOrCreateThreadPrivateCache(VD)};
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 OMPBuilder.getOrCreateRuntimeFunction(
1605 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1606 Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1608 }
1609
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1610 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1617 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1618 OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc,
1623 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1624 Ctor, CopyCtor, Dtor};
1625 CGF.EmitRuntimeCall(
1626 OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628 Args);
1629 }
1630
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1631 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633 bool PerformInit, CodeGenFunction *CGF) {
1634 if (CGM.getLangOpts().OpenMPUseTLS &&
1635 CGM.getContext().getTargetInfo().isTLSSupported())
1636 return nullptr;
1637
1638 VD = VD->getDefinition(CGM.getContext());
1639 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640 QualType ASTTy = VD->getType();
1641
1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643 const Expr *Init = VD->getAnyInitializer();
1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645 // Generate function that re-emits the declaration's initializer into the
1646 // threadprivate copy of the variable VD
1647 CodeGenFunction CtorCGF(CGM);
1648 FunctionArgList Args;
1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651 ImplicitParamKind::Other);
1652 Args.push_back(&Dst);
1653
1654 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655 CGM.getContext().VoidPtrTy, Args);
1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657 std::string Name = getName({"__kmpc_global_ctor_", ""});
1658 llvm::Function *Fn =
1659 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1660 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661 Args, Loc, Loc);
1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664 CGM.getContext().VoidPtrTy, Dst.getLocation());
1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666 VDAddr.getAlignment());
1667 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668 /*IsInitializer=*/true);
1669 ArgVal = CtorCGF.EmitLoadOfScalar(
1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671 CGM.getContext().VoidPtrTy, Dst.getLocation());
1672 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673 CtorCGF.FinishFunction();
1674 Ctor = Fn;
1675 }
1676 if (VD->getType().isDestructedType() != QualType::DK_none) {
1677 // Generate function that emits destructor call for the threadprivate copy
1678 // of the variable VD
1679 CodeGenFunction DtorCGF(CGM);
1680 FunctionArgList Args;
1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683 ImplicitParamKind::Other);
1684 Args.push_back(&Dst);
1685
1686 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687 CGM.getContext().VoidTy, Args);
1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689 std::string Name = getName({"__kmpc_global_dtor_", ""});
1690 llvm::Function *Fn =
1691 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1692 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694 Loc, Loc);
1695 // Create a scope with an artificial location for the body of this function.
1696 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698 DtorCGF.GetAddrOfLocalVar(&Dst),
1699 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700 DtorCGF.emitDestroy(
1701 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704 DtorCGF.FinishFunction();
1705 Dtor = Fn;
1706 }
1707 // Do not emit init function if it is not required.
1708 if (!Ctor && !Dtor)
1709 return nullptr;
1710
1711 // Copying constructor for the threadprivate variable.
1712 // Must be NULL - reserved by runtime, but currently it requires that this
1713 // parameter is always NULL. Otherwise it fires assertion.
1714 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1715 if (Ctor == nullptr) {
1716 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1717 }
1718 if (Dtor == nullptr) {
1719 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1720 }
1721 if (!CGF) {
1722 auto *InitFunctionTy =
1723 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1724 std::string Name = getName({"__omp_threadprivate_init_", ""});
1725 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1727 CodeGenFunction InitCGF(CGM);
1728 FunctionArgList ArgList;
1729 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1730 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1731 Loc, Loc);
1732 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733 InitCGF.FinishFunction();
1734 return InitFunction;
1735 }
1736 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 }
1738 return nullptr;
1739 }
1740
emitDeclareTargetFunction(const FunctionDecl * FD,llvm::GlobalValue * GV)1741 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742 llvm::GlobalValue *GV) {
1743 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1745
1746 // We only need to handle active 'indirect' declare target functions.
1747 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748 return;
1749
1750 // Get a mangled name to store the new device global in.
1751 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1753 SmallString<128> Name;
1754 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755
1756 // We need to generate a new global to hold the address of the indirectly
1757 // called device function. Doing this allows us to keep the visibility and
1758 // linkage of the associated function unchanged while allowing the runtime to
1759 // access its value.
1760 llvm::GlobalValue *Addr = GV;
1761 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762 Addr = new llvm::GlobalVariable(
1763 CGM.getModule(), CGM.VoidPtrTy,
1764 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765 nullptr, llvm::GlobalValue::NotThreadLocal,
1766 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768 }
1769
1770 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1772 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773 llvm::GlobalValue::WeakODRLinkage);
1774 }
1775
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1776 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777 QualType VarType,
1778 StringRef Name) {
1779 std::string Suffix = getName({"artificial", ""});
1780 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1781 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782 VarLVType, Twine(Name).concat(Suffix).str());
1783 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784 CGM.getTarget().isTLSSupported()) {
1785 GAddr->setThreadLocal(/*Val=*/true);
1786 return Address(GAddr, GAddr->getValueType(),
1787 CGM.getContext().getTypeAlignInChars(VarType));
1788 }
1789 std::string CacheSuffix = getName({"cache", ""});
1790 llvm::Value *Args[] = {
1791 emitUpdateLocation(CGF, SourceLocation()),
1792 getThreadID(CGF, SourceLocation()),
1793 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1794 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1795 /*isSigned=*/false),
1796 OMPBuilder.getOrCreateInternalVariable(
1797 CGM.VoidPtrPtrTy,
1798 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1799 return Address(
1800 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801 CGF.EmitRuntimeCall(
1802 OMPBuilder.getOrCreateRuntimeFunction(
1803 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1804 Args),
1805 CGF.Builder.getPtrTy(0)),
1806 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1807 }
1808
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1809 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810 const RegionCodeGenTy &ThenGen,
1811 const RegionCodeGenTy &ElseGen) {
1812 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813
1814 // If the condition constant folds and can be elided, try to avoid emitting
1815 // the condition and the dead arm of the if/else.
1816 bool CondConstant;
1817 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1818 if (CondConstant)
1819 ThenGen(CGF);
1820 else
1821 ElseGen(CGF);
1822 return;
1823 }
1824
1825 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1826 // emit the conditional branch.
1827 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1828 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1829 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1830 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1831
1832 // Emit the 'then' code.
1833 CGF.EmitBlock(ThenBlock);
1834 ThenGen(CGF);
1835 CGF.EmitBranch(ContBlock);
1836 // Emit the 'else' code if present.
1837 // There is no need to emit line number for unconditional branch.
1838 (void)ApplyDebugLocation::CreateEmpty(CGF);
1839 CGF.EmitBlock(ElseBlock);
1840 ElseGen(CGF);
1841 // There is no need to emit line number for unconditional branch.
1842 (void)ApplyDebugLocation::CreateEmpty(CGF);
1843 CGF.EmitBranch(ContBlock);
1844 // Emit the continuation block for code after the if.
1845 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1846 }
1847
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)1848 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849 llvm::Function *OutlinedFn,
1850 ArrayRef<llvm::Value *> CapturedVars,
1851 const Expr *IfCond,
1852 llvm::Value *NumThreads) {
1853 if (!CGF.HaveInsertPoint())
1854 return;
1855 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856 auto &M = CGM.getModule();
1857 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858 this](CodeGenFunction &CGF, PrePostActionTy &) {
1859 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860 llvm::Value *Args[] = {
1861 RTLoc,
1862 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1863 OutlinedFn};
1864 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865 RealArgs.append(std::begin(Args), std::end(Args));
1866 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1867
1868 llvm::FunctionCallee RTLFn =
1869 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1870 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1871 };
1872 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873 this](CodeGenFunction &CGF, PrePostActionTy &) {
1874 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876 // Build calls:
1877 // __kmpc_serialized_parallel(&Loc, GTid);
1878 llvm::Value *Args[] = {RTLoc, ThreadID};
1879 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1880 M, OMPRTL___kmpc_serialized_parallel),
1881 Args);
1882
1883 // OutlinedFn(>id, &zero_bound, CapturedStruct);
1884 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885 RawAddress ZeroAddrBound =
1886 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1887 /*Name=*/".bound.zero.addr");
1888 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1889 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890 // ThreadId for serialized parallels is 0.
1891 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1892 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1893 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1894
1895 // Ensure we do not inline the function. This is trivially true for the ones
1896 // passed to __kmpc_fork_call but the ones called in serialized regions
1897 // could be inlined. This is not a perfect but it is closer to the invariant
1898 // we want, namely, every data environment starts with a new function.
1899 // TODO: We should pass the if condition to the runtime function and do the
1900 // handling there. Much cleaner code.
1901 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1902 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1903 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1904
1905 // __kmpc_end_serialized_parallel(&Loc, GTid);
1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1908 M, OMPRTL___kmpc_end_serialized_parallel),
1909 EndArgs);
1910 };
1911 if (IfCond) {
1912 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1913 } else {
1914 RegionCodeGenTy ThenRCG(ThenGen);
1915 ThenRCG(CGF);
1916 }
1917 }
1918
1919 // If we're inside an (outlined) parallel region, use the region info's
1920 // thread-ID variable (it is passed in a first argument of the outlined function
1921 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922 // regular serial code region, get thread ID by calling kmp_int32
1923 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1925 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926 SourceLocation Loc) {
1927 if (auto *OMPRegionInfo =
1928 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1929 if (OMPRegionInfo->getThreadIDVariable())
1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931
1932 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933 QualType Int32Ty =
1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1936 CGF.EmitStoreOfScalar(ThreadID,
1937 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1938
1939 return ThreadIDTemp;
1940 }
1941
getCriticalRegionLock(StringRef CriticalName)1942 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944 std::string Name = getName({Prefix, "var"});
1945 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1946 }
1947
1948 namespace {
1949 /// Common pre(post)-action for different OpenMP constructs.
1950 class CommonActionTy final : public PrePostActionTy {
1951 llvm::FunctionCallee EnterCallee;
1952 ArrayRef<llvm::Value *> EnterArgs;
1953 llvm::FunctionCallee ExitCallee;
1954 ArrayRef<llvm::Value *> ExitArgs;
1955 bool Conditional;
1956 llvm::BasicBlock *ContBlock = nullptr;
1957
1958 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)1959 CommonActionTy(llvm::FunctionCallee EnterCallee,
1960 ArrayRef<llvm::Value *> EnterArgs,
1961 llvm::FunctionCallee ExitCallee,
1962 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)1965 void Enter(CodeGenFunction &CGF) override {
1966 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1967 if (Conditional) {
1968 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1969 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1970 ContBlock = CGF.createBasicBlock("omp_if.end");
1971 // Generate the branch (If-stmt)
1972 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1973 CGF.EmitBlock(ThenBlock);
1974 }
1975 }
Done(CodeGenFunction & CGF)1976 void Done(CodeGenFunction &CGF) {
1977 // Emit the rest of blocks/branches
1978 CGF.EmitBranch(ContBlock);
1979 CGF.EmitBlock(ContBlock, true);
1980 }
Exit(CodeGenFunction & CGF)1981 void Exit(CodeGenFunction &CGF) override {
1982 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1983 }
1984 };
1985 } // anonymous namespace
1986
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)1987 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988 StringRef CriticalName,
1989 const RegionCodeGenTy &CriticalOpGen,
1990 SourceLocation Loc, const Expr *Hint) {
1991 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992 // CriticalOpGen();
1993 // __kmpc_end_critical(ident_t *, gtid, Lock);
1994 // Prepare arguments and build a call to __kmpc_critical
1995 if (!CGF.HaveInsertPoint())
1996 return;
1997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998 getCriticalRegionLock(CriticalName)};
1999 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2000 std::end(Args));
2001 if (Hint) {
2002 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2003 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2004 }
2005 CommonActionTy Action(
2006 OMPBuilder.getOrCreateRuntimeFunction(
2007 CGM.getModule(),
2008 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009 EnterArgs,
2010 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2011 OMPRTL___kmpc_end_critical),
2012 Args);
2013 CriticalOpGen.setAction(Action);
2014 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2015 }
2016
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2017 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018 const RegionCodeGenTy &MasterOpGen,
2019 SourceLocation Loc) {
2020 if (!CGF.HaveInsertPoint())
2021 return;
2022 // if(__kmpc_master(ident_t *, gtid)) {
2023 // MasterOpGen();
2024 // __kmpc_end_master(ident_t *, gtid);
2025 // }
2026 // Prepare arguments and build a call to __kmpc_master
2027 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029 CGM.getModule(), OMPRTL___kmpc_master),
2030 Args,
2031 OMPBuilder.getOrCreateRuntimeFunction(
2032 CGM.getModule(), OMPRTL___kmpc_end_master),
2033 Args,
2034 /*Conditional=*/true);
2035 MasterOpGen.setAction(Action);
2036 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2037 Action.Done(CGF);
2038 }
2039
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2040 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041 const RegionCodeGenTy &MaskedOpGen,
2042 SourceLocation Loc, const Expr *Filter) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046 // MaskedOpGen();
2047 // __kmpc_end_masked(iden_t *, gtid);
2048 // }
2049 // Prepare arguments and build a call to __kmpc_masked
2050 llvm::Value *FilterVal = Filter
2051 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2052 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2053 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054 FilterVal};
2055 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056 getThreadID(CGF, Loc)};
2057 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058 CGM.getModule(), OMPRTL___kmpc_masked),
2059 Args,
2060 OMPBuilder.getOrCreateRuntimeFunction(
2061 CGM.getModule(), OMPRTL___kmpc_end_masked),
2062 ArgsEnd,
2063 /*Conditional=*/true);
2064 MaskedOpGen.setAction(Action);
2065 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2066 Action.Done(CGF);
2067 }
2068
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2069 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070 SourceLocation Loc) {
2071 if (!CGF.HaveInsertPoint())
2072 return;
2073 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074 OMPBuilder.createTaskyield(CGF.Builder);
2075 } else {
2076 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077 llvm::Value *Args[] = {
2078 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2080 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2081 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2082 Args);
2083 }
2084
2085 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2086 Region->emitUntiedSwitch(CGF);
2087 }
2088
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2089 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090 const RegionCodeGenTy &TaskgroupOpGen,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 // __kmpc_taskgroup(ident_t *, gtid);
2095 // TaskgroupOpGen();
2096 // __kmpc_end_taskgroup(ident_t *, gtid);
2097 // Prepare arguments and build a call to __kmpc_taskgroup
2098 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2101 Args,
2102 OMPBuilder.getOrCreateRuntimeFunction(
2103 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2104 Args);
2105 TaskgroupOpGen.setAction(Action);
2106 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2107 }
2108
2109 /// Given an array of pointers to variables, project the address of a
2110 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2111 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112 unsigned Index, const VarDecl *Var) {
2113 // Pull out the pointer to the variable.
2114 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2115 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2116
2117 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2118 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2119 }
2120
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsElemType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2121 static llvm::Value *emitCopyprivateCopyFunction(
2122 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125 SourceLocation Loc) {
2126 ASTContext &C = CGM.getContext();
2127 // void copy_func(void *LHSArg, void *RHSArg);
2128 FunctionArgList Args;
2129 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130 ImplicitParamKind::Other);
2131 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132 ImplicitParamKind::Other);
2133 Args.push_back(&LHSArg);
2134 Args.push_back(&RHSArg);
2135 const auto &CGFI =
2136 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2137 std::string Name =
2138 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2139 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2140 llvm::GlobalValue::InternalLinkage, Name,
2141 &CGM.getModule());
2142 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2143 Fn->setDoesNotRecurse();
2144 CodeGenFunction CGF(CGM);
2145 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2146 // Dest = (void*[n])(LHSArg);
2147 // Src = (void*[n])(RHSArg);
2148 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2150 CGF.Builder.getPtrTy(0)),
2151 ArgsElemType, CGF.getPointerAlign());
2152 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2154 CGF.Builder.getPtrTy(0)),
2155 ArgsElemType, CGF.getPointerAlign());
2156 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158 // ...
2159 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161 const auto *DestVar =
2162 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2163 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2164
2165 const auto *SrcVar =
2166 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2167 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2168
2169 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2170 QualType Type = VD->getType();
2171 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2172 }
2173 CGF.FinishFunction();
2174 return Fn;
2175 }
2176
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2177 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178 const RegionCodeGenTy &SingleOpGen,
2179 SourceLocation Loc,
2180 ArrayRef<const Expr *> CopyprivateVars,
2181 ArrayRef<const Expr *> SrcExprs,
2182 ArrayRef<const Expr *> DstExprs,
2183 ArrayRef<const Expr *> AssignmentOps) {
2184 if (!CGF.HaveInsertPoint())
2185 return;
2186 assert(CopyprivateVars.size() == SrcExprs.size() &&
2187 CopyprivateVars.size() == DstExprs.size() &&
2188 CopyprivateVars.size() == AssignmentOps.size());
2189 ASTContext &C = CGM.getContext();
2190 // int32 did_it = 0;
2191 // if(__kmpc_single(ident_t *, gtid)) {
2192 // SingleOpGen();
2193 // __kmpc_end_single(ident_t *, gtid);
2194 // did_it = 1;
2195 // }
2196 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197 // <copy_func>, did_it);
2198
2199 Address DidIt = Address::invalid();
2200 if (!CopyprivateVars.empty()) {
2201 // int32 did_it = 0;
2202 QualType KmpInt32Ty =
2203 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2205 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2206 }
2207 // Prepare arguments and build a call to __kmpc_single
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210 CGM.getModule(), OMPRTL___kmpc_single),
2211 Args,
2212 OMPBuilder.getOrCreateRuntimeFunction(
2213 CGM.getModule(), OMPRTL___kmpc_end_single),
2214 Args,
2215 /*Conditional=*/true);
2216 SingleOpGen.setAction(Action);
2217 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2218 if (DidIt.isValid()) {
2219 // did_it = 1;
2220 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2221 }
2222 Action.Done(CGF);
2223 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224 // <copy_func>, did_it);
2225 if (DidIt.isValid()) {
2226 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227 QualType CopyprivateArrayTy = C.getConstantArrayType(
2228 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2229 /*IndexTypeQuals=*/0);
2230 // Create a list of all private variables for copyprivate.
2231 Address CopyprivateList =
2232 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2233 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2235 CGF.Builder.CreateStore(
2236 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2238 CGF.VoidPtrTy),
2239 Elem);
2240 }
2241 // Build function that copies private values from single region to all other
2242 // threads in the corresponding parallel region.
2243 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2245 SrcExprs, DstExprs, AssignmentOps, Loc);
2246 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2247 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2249 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2250 llvm::Value *Args[] = {
2251 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252 getThreadID(CGF, Loc), // i32 <gtid>
2253 BufSize, // size_t <buf_size>
2254 CL.emitRawPointer(CGF), // void *<copyprivate list>
2255 CpyFn, // void (*) (void *, void *) <copy_func>
2256 DidItVal // i32 did_it
2257 };
2258 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2259 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2260 Args);
2261 }
2262 }
2263
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2264 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265 const RegionCodeGenTy &OrderedOpGen,
2266 SourceLocation Loc, bool IsThreads) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // __kmpc_ordered(ident_t *, gtid);
2270 // OrderedOpGen();
2271 // __kmpc_end_ordered(ident_t *, gtid);
2272 // Prepare arguments and build a call to __kmpc_ordered
2273 if (IsThreads) {
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 CGM.getModule(), OMPRTL___kmpc_ordered),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2280 Args);
2281 OrderedOpGen.setAction(Action);
2282 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2283 return;
2284 }
2285 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286 }
2287
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2288 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289 unsigned Flags;
2290 if (Kind == OMPD_for)
2291 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292 else if (Kind == OMPD_sections)
2293 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294 else if (Kind == OMPD_single)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296 else if (Kind == OMPD_barrier)
2297 Flags = OMP_IDENT_BARRIER_EXPL;
2298 else
2299 Flags = OMP_IDENT_BARRIER_IMPL;
2300 return Flags;
2301 }
2302
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2303 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304 CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306 // Check if the loop directive is actually a doacross loop directive. In this
2307 // case choose static, 1 schedule.
2308 if (llvm::any_of(
2309 S.getClausesOfKind<OMPOrderedClause>(),
2310 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311 ScheduleKind = OMPC_SCHEDULE_static;
2312 // Chunk size is 1 in this case.
2313 llvm::APInt ChunkSize(32, 1);
2314 ChunkExpr = IntegerLiteral::Create(
2315 CGF.getContext(), ChunkSize,
2316 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2317 SourceLocation());
2318 }
2319 }
2320
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2321 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322 OpenMPDirectiveKind Kind, bool EmitChecks,
2323 bool ForceSimpleCall) {
2324 // Check if we should use the OMPBuilder
2325 auto *OMPRegionInfo =
2326 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2330 EmitChecks));
2331 CGF.Builder.restoreIP(AfterIP);
2332 return;
2333 }
2334
2335 if (!CGF.HaveInsertPoint())
2336 return;
2337 // Build call __kmpc_cancel_barrier(loc, thread_id);
2338 // Build call __kmpc_barrier(loc, thread_id);
2339 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341 // thread_id);
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343 getThreadID(CGF, Loc)};
2344 if (OMPRegionInfo) {
2345 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346 llvm::Value *Result = CGF.EmitRuntimeCall(
2347 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2348 OMPRTL___kmpc_cancel_barrier),
2349 Args);
2350 if (EmitChecks) {
2351 // if (__kmpc_cancel_barrier()) {
2352 // exit from construct;
2353 // }
2354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2357 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2358 CGF.EmitBlock(ExitBB);
2359 // exit from construct;
2360 CodeGenFunction::JumpDest CancelDestination =
2361 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2362 CGF.EmitBranchThroughCleanup(CancelDestination);
2363 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2364 }
2365 return;
2366 }
2367 }
2368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2369 CGM.getModule(), OMPRTL___kmpc_barrier),
2370 Args);
2371 }
2372
emitErrorCall(CodeGenFunction & CGF,SourceLocation Loc,Expr * ME,bool IsFatal)2373 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374 Expr *ME, bool IsFatal) {
2375 llvm::Value *MVL =
2376 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2377 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2378 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379 // *message)
2380 llvm::Value *Args[] = {
2381 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2382 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2383 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2384 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2385 CGM.getModule(), OMPRTL___kmpc_error),
2386 Args);
2387 }
2388
2389 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2390 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391 bool Chunked, bool Ordered) {
2392 switch (ScheduleKind) {
2393 case OMPC_SCHEDULE_static:
2394 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395 : (Ordered ? OMP_ord_static : OMP_sch_static);
2396 case OMPC_SCHEDULE_dynamic:
2397 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398 case OMPC_SCHEDULE_guided:
2399 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400 case OMPC_SCHEDULE_runtime:
2401 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402 case OMPC_SCHEDULE_auto:
2403 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404 case OMPC_SCHEDULE_unknown:
2405 assert(!Chunked && "chunk was specified but schedule kind not known");
2406 return Ordered ? OMP_ord_static : OMP_sch_static;
2407 }
2408 llvm_unreachable("Unexpected runtime schedule");
2409 }
2410
2411 /// Map the OpenMP distribute schedule to the runtime enumeration.
2412 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2413 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414 // only static is allowed for dist_schedule
2415 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416 }
2417
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2418 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419 bool Chunked) const {
2420 OpenMPSchedType Schedule =
2421 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422 return Schedule == OMP_sch_static;
2423 }
2424
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2425 bool CGOpenMPRuntime::isStaticNonchunked(
2426 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428 return Schedule == OMP_dist_sch_static;
2429 }
2430
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2431 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432 bool Chunked) const {
2433 OpenMPSchedType Schedule =
2434 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435 return Schedule == OMP_sch_static_chunked;
2436 }
2437
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2438 bool CGOpenMPRuntime::isStaticChunked(
2439 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441 return Schedule == OMP_dist_sch_static_chunked;
2442 }
2443
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2444 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445 OpenMPSchedType Schedule =
2446 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448 return Schedule != OMP_sch_static;
2449 }
2450
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2451 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452 OpenMPScheduleClauseModifier M1,
2453 OpenMPScheduleClauseModifier M2) {
2454 int Modifier = 0;
2455 switch (M1) {
2456 case OMPC_SCHEDULE_MODIFIER_monotonic:
2457 Modifier = OMP_sch_modifier_monotonic;
2458 break;
2459 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460 Modifier = OMP_sch_modifier_nonmonotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_simd:
2463 if (Schedule == OMP_sch_static_chunked)
2464 Schedule = OMP_sch_static_balanced_chunked;
2465 break;
2466 case OMPC_SCHEDULE_MODIFIER_last:
2467 case OMPC_SCHEDULE_MODIFIER_unknown:
2468 break;
2469 }
2470 switch (M2) {
2471 case OMPC_SCHEDULE_MODIFIER_monotonic:
2472 Modifier = OMP_sch_modifier_monotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475 Modifier = OMP_sch_modifier_nonmonotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_simd:
2478 if (Schedule == OMP_sch_static_chunked)
2479 Schedule = OMP_sch_static_balanced_chunked;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_last:
2482 case OMPC_SCHEDULE_MODIFIER_unknown:
2483 break;
2484 }
2485 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486 // If the static schedule kind is specified or if the ordered clause is
2487 // specified, and if the nonmonotonic modifier is not specified, the effect is
2488 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489 // modifier is specified, the effect is as if the nonmonotonic modifier is
2490 // specified.
2491 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493 Schedule == OMP_sch_static_balanced_chunked ||
2494 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495 Schedule == OMP_dist_sch_static_chunked ||
2496 Schedule == OMP_dist_sch_static))
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 }
2499 return Schedule | Modifier;
2500 }
2501
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2502 void CGOpenMPRuntime::emitForDispatchInit(
2503 CodeGenFunction &CGF, SourceLocation Loc,
2504 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505 bool Ordered, const DispatchRTInput &DispatchValues) {
2506 if (!CGF.HaveInsertPoint())
2507 return;
2508 OpenMPSchedType Schedule = getRuntimeSchedule(
2509 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2510 assert(Ordered ||
2511 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513 Schedule != OMP_sch_static_balanced_chunked));
2514 // Call __kmpc_dispatch_init(
2515 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2517 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518
2519 // If the Chunk was not specified in the clause - use default value 1.
2520 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521 : CGF.Builder.getIntN(IVSize, 1);
2522 llvm::Value *Args[] = {
2523 emitUpdateLocation(CGF, Loc),
2524 getThreadID(CGF, Loc),
2525 CGF.Builder.getInt32(addMonoNonMonoModifier(
2526 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2527 DispatchValues.LB, // Lower
2528 DispatchValues.UB, // Upper
2529 CGF.Builder.getIntN(IVSize, 1), // Stride
2530 Chunk // Chunk
2531 };
2532 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533 Args);
2534 }
2535
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)2536 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537 SourceLocation Loc) {
2538 if (!CGF.HaveInsertPoint())
2539 return;
2540 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2543 }
2544
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2545 static void emitForStaticInitCall(
2546 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549 const CGOpenMPRuntime::StaticRTInput &Values) {
2550 if (!CGF.HaveInsertPoint())
2551 return;
2552
2553 assert(!Values.Ordered);
2554 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555 Schedule == OMP_sch_static_balanced_chunked ||
2556 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557 Schedule == OMP_dist_sch_static ||
2558 Schedule == OMP_dist_sch_static_chunked);
2559
2560 // Call __kmpc_for_static_init(
2561 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565 llvm::Value *Chunk = Values.Chunk;
2566 if (Chunk == nullptr) {
2567 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568 Schedule == OMP_dist_sch_static) &&
2569 "expected static non-chunked schedule");
2570 // If the Chunk was not specified in the clause - use default value 1.
2571 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2572 } else {
2573 assert((Schedule == OMP_sch_static_chunked ||
2574 Schedule == OMP_sch_static_balanced_chunked ||
2575 Schedule == OMP_ord_static_chunked ||
2576 Schedule == OMP_dist_sch_static_chunked) &&
2577 "expected static chunked schedule");
2578 }
2579 llvm::Value *Args[] = {
2580 UpdateLocation,
2581 ThreadId,
2582 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2583 M2)), // Schedule type
2584 Values.IL.emitRawPointer(CGF), // &isLastIter
2585 Values.LB.emitRawPointer(CGF), // &LB
2586 Values.UB.emitRawPointer(CGF), // &UB
2587 Values.ST.emitRawPointer(CGF), // &Stride
2588 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2589 Chunk // Chunk
2590 };
2591 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2592 }
2593
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2594 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595 SourceLocation Loc,
2596 OpenMPDirectiveKind DKind,
2597 const OpenMPScheduleTy &ScheduleKind,
2598 const StaticRTInput &Values) {
2599 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2601 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602 "Expected loop-based or sections-based directive.");
2603 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604 isOpenMPLoopDirective(DKind)
2605 ? OMP_IDENT_WORK_LOOP
2606 : OMP_IDENT_WORK_SECTIONS);
2607 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608 llvm::FunctionCallee StaticInitFunction =
2609 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2610 false);
2611 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2612 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2613 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2614 }
2615
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2616 void CGOpenMPRuntime::emitDistributeStaticInit(
2617 CodeGenFunction &CGF, SourceLocation Loc,
2618 OpenMPDistScheduleClauseKind SchedKind,
2619 const CGOpenMPRuntime::StaticRTInput &Values) {
2620 OpenMPSchedType ScheduleNum =
2621 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2622 llvm::Value *UpdatedLocation =
2623 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2624 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625 llvm::FunctionCallee StaticInitFunction;
2626 bool isGPUDistribute =
2627 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629 Values.IVSize, Values.IVSigned, isGPUDistribute);
2630
2631 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2632 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2633 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634 }
2635
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2636 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637 SourceLocation Loc,
2638 OpenMPDirectiveKind DKind) {
2639 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640 DKind == OMPD_sections) &&
2641 "Expected distribute, for, or sections directive kind");
2642 if (!CGF.HaveInsertPoint())
2643 return;
2644 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645 llvm::Value *Args[] = {
2646 emitUpdateLocation(CGF, Loc,
2647 isOpenMPDistributeDirective(DKind) ||
2648 (DKind == OMPD_target_teams_loop)
2649 ? OMP_IDENT_WORK_DISTRIBUTE
2650 : isOpenMPLoopDirective(DKind)
2651 ? OMP_IDENT_WORK_LOOP
2652 : OMP_IDENT_WORK_SECTIONS),
2653 getThreadID(CGF, Loc)};
2654 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2655 if (isOpenMPDistributeDirective(DKind) &&
2656 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657 CGF.EmitRuntimeCall(
2658 OMPBuilder.getOrCreateRuntimeFunction(
2659 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2660 Args);
2661 else
2662 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2663 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2664 Args);
2665 }
2666
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2667 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668 SourceLocation Loc,
2669 unsigned IVSize,
2670 bool IVSigned) {
2671 if (!CGF.HaveInsertPoint())
2672 return;
2673 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676 Args);
2677 }
2678
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2679 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680 SourceLocation Loc, unsigned IVSize,
2681 bool IVSigned, Address IL,
2682 Address LB, Address UB,
2683 Address ST) {
2684 // Call __kmpc_dispatch_next(
2685 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687 // kmp_int[32|64] *p_stride);
2688 llvm::Value *Args[] = {
2689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690 IL.emitRawPointer(CGF), // &isLastIter
2691 LB.emitRawPointer(CGF), // &Lower
2692 UB.emitRawPointer(CGF), // &Upper
2693 ST.emitRawPointer(CGF) // &Stride
2694 };
2695 llvm::Value *Call = CGF.EmitRuntimeCall(
2696 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2697 return CGF.EmitScalarConversion(
2698 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2699 CGF.getContext().BoolTy, Loc);
2700 }
2701
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2702 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703 llvm::Value *NumThreads,
2704 SourceLocation Loc) {
2705 if (!CGF.HaveInsertPoint())
2706 return;
2707 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708 llvm::Value *Args[] = {
2709 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2712 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2713 Args);
2714 }
2715
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2716 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717 ProcBindKind ProcBind,
2718 SourceLocation Loc) {
2719 if (!CGF.HaveInsertPoint())
2720 return;
2721 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723 llvm::Value *Args[] = {
2724 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2726 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2727 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2728 Args);
2729 }
2730
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2731 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734 OMPBuilder.createFlush(CGF.Builder);
2735 } else {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2738 // Build call void __kmpc_flush(ident_t *loc)
2739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740 CGM.getModule(), OMPRTL___kmpc_flush),
2741 emitUpdateLocation(CGF, Loc));
2742 }
2743 }
2744
2745 namespace {
2746 /// Indexes of fields for type kmp_task_t.
2747 enum KmpTaskTFields {
2748 /// List of shared variables.
2749 KmpTaskTShareds,
2750 /// Task routine.
2751 KmpTaskTRoutine,
2752 /// Partition id for the untied tasks.
2753 KmpTaskTPartId,
2754 /// Function with call of destructors for private variables.
2755 Data1,
2756 /// Task priority.
2757 Data2,
2758 /// (Taskloops only) Lower bound.
2759 KmpTaskTLowerBound,
2760 /// (Taskloops only) Upper bound.
2761 KmpTaskTUpperBound,
2762 /// (Taskloops only) Stride.
2763 KmpTaskTStride,
2764 /// (Taskloops only) Is last iteration flag.
2765 KmpTaskTLastIter,
2766 /// (Taskloops only) Reduction data.
2767 KmpTaskTReductions,
2768 };
2769 } // anonymous namespace
2770
createOffloadEntriesAndInfoMetadata()2771 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772 // If we are in simd mode or there are no entries, we don't need to do
2773 // anything.
2774 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775 return;
2776
2777 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780 SourceLocation Loc;
2781 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783 E = CGM.getContext().getSourceManager().fileinfo_end();
2784 I != E; ++I) {
2785 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788 I->getFirst(), EntryInfo.Line, 1);
2789 break;
2790 }
2791 }
2792 }
2793 switch (Kind) {
2794 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796 DiagnosticsEngine::Error, "Offloading entry for target region in "
2797 "%0 is incorrect: either the "
2798 "address or the ID is invalid.");
2799 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800 } break;
2801 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803 DiagnosticsEngine::Error, "Offloading entry for declare target "
2804 "variable %0 is incorrect: the "
2805 "address is invalid.");
2806 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807 } break;
2808 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810 DiagnosticsEngine::Error,
2811 "Offloading entry for declare target variable is incorrect: the "
2812 "address is invalid.");
2813 CGM.getDiags().Report(DiagID);
2814 } break;
2815 }
2816 };
2817
2818 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2819 }
2820
emitKmpRoutineEntryT(QualType KmpInt32Ty)2821 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822 if (!KmpRoutineEntryPtrTy) {
2823 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824 ASTContext &C = CGM.getContext();
2825 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826 FunctionProtoType::ExtProtoInfo EPI;
2827 KmpRoutineEntryPtrQTy = C.getPointerType(
2828 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2829 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2830 }
2831 }
2832
2833 namespace {
2834 struct PrivateHelpersTy {
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2835 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838 PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2839 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840 const Expr *OriginalRef = nullptr;
2841 const VarDecl *Original = nullptr;
2842 const VarDecl *PrivateCopy = nullptr;
2843 const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anon93cce0fb0e11::PrivateHelpersTy2844 bool isLocalPrivate() const {
2845 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846 }
2847 };
2848 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849 } // anonymous namespace
2850
isAllocatableDecl(const VarDecl * VD)2851 static bool isAllocatableDecl(const VarDecl *VD) {
2852 const VarDecl *CVD = VD->getCanonicalDecl();
2853 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854 return false;
2855 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856 // Use the default allocation.
2857 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858 !AA->getAllocator());
2859 }
2860
2861 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)2862 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863 if (!Privates.empty()) {
2864 ASTContext &C = CGM.getContext();
2865 // Build struct .kmp_privates_t. {
2866 // /* private vars */
2867 // };
2868 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2869 RD->startDefinition();
2870 for (const auto &Pair : Privates) {
2871 const VarDecl *VD = Pair.second.Original;
2872 QualType Type = VD->getType().getNonReferenceType();
2873 // If the private variable is a local variable with lvalue ref type,
2874 // allocate the pointer instead of the pointee type.
2875 if (Pair.second.isLocalPrivate()) {
2876 if (VD->getType()->isLValueReferenceType())
2877 Type = C.getPointerType(Type);
2878 if (isAllocatableDecl(VD))
2879 Type = C.getPointerType(Type);
2880 }
2881 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2882 if (VD->hasAttrs()) {
2883 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884 E(VD->getAttrs().end());
2885 I != E; ++I)
2886 FD->addAttr(*I);
2887 }
2888 }
2889 RD->completeDefinition();
2890 return RD;
2891 }
2892 return nullptr;
2893 }
2894
2895 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)2896 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897 QualType KmpInt32Ty,
2898 QualType KmpRoutineEntryPointerQTy) {
2899 ASTContext &C = CGM.getContext();
2900 // Build struct kmp_task_t {
2901 // void * shareds;
2902 // kmp_routine_entry_t routine;
2903 // kmp_int32 part_id;
2904 // kmp_cmplrdata_t data1;
2905 // kmp_cmplrdata_t data2;
2906 // For taskloops additional fields:
2907 // kmp_uint64 lb;
2908 // kmp_uint64 ub;
2909 // kmp_int64 st;
2910 // kmp_int32 liter;
2911 // void * reductions;
2912 // };
2913 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2914 UD->startDefinition();
2915 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2916 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2917 UD->completeDefinition();
2918 QualType KmpCmplrdataTy = C.getRecordType(UD);
2919 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2920 RD->startDefinition();
2921 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2922 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2923 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2924 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2925 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2926 if (isOpenMPTaskLoopDirective(Kind)) {
2927 QualType KmpUInt64Ty =
2928 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929 QualType KmpInt64Ty =
2930 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2932 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2933 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2934 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2935 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2936 }
2937 RD->completeDefinition();
2938 return RD;
2939 }
2940
2941 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)2942 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943 ArrayRef<PrivateDataTy> Privates) {
2944 ASTContext &C = CGM.getContext();
2945 // Build struct kmp_task_t_with_privates {
2946 // kmp_task_t task_data;
2947 // .kmp_privates_t. privates;
2948 // };
2949 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2950 RD->startDefinition();
2951 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2952 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2954 RD->completeDefinition();
2955 return RD;
2956 }
2957
2958 /// Emit a proxy function which accepts kmp_task_t as the second
2959 /// argument.
2960 /// \code
2961 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963 /// For taskloops:
2964 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965 /// tt->reductions, tt->shareds);
2966 /// return 0;
2967 /// }
2968 /// \endcode
2969 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)2970 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972 QualType KmpTaskTWithPrivatesPtrQTy,
2973 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975 llvm::Value *TaskPrivatesMap) {
2976 ASTContext &C = CGM.getContext();
2977 FunctionArgList Args;
2978 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979 ImplicitParamKind::Other);
2980 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982 ImplicitParamKind::Other);
2983 Args.push_back(&GtidArg);
2984 Args.push_back(&TaskTypeArg);
2985 const auto &TaskEntryFnInfo =
2986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2987 llvm::FunctionType *TaskEntryTy =
2988 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2989 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2990 auto *TaskEntry = llvm::Function::Create(
2991 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2992 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2993 TaskEntry->setDoesNotRecurse();
2994 CodeGenFunction CGF(CGM);
2995 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
2996 Loc, Loc);
2997
2998 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999 // tt,
3000 // For taskloops:
3001 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002 // tt->task_data.shareds);
3003 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3005 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3007 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008 const auto *KmpTaskTWithPrivatesQTyRD =
3009 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010 LValue Base =
3011 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3013 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3014 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3015 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016
3017 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3018 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3019 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3021 CGF.ConvertTypeForMem(SharedsPtrTy));
3022
3023 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3024 llvm::Value *PrivatesParam;
3025 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3027 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3029 } else {
3030 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3031 }
3032
3033 llvm::Value *CommonArgs[] = {
3034 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035 CGF.Builder
3036 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3037 CGF.VoidPtrTy, CGF.Int8Ty)
3038 .emitRawPointer(CGF)};
3039 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3040 std::end(CommonArgs));
3041 if (isOpenMPTaskLoopDirective(Kind)) {
3042 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3043 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3044 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3045 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3046 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3047 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3048 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3049 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3050 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3051 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3052 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3053 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3054 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3055 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3056 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3057 CallArgs.push_back(LBParam);
3058 CallArgs.push_back(UBParam);
3059 CallArgs.push_back(StParam);
3060 CallArgs.push_back(LIParam);
3061 CallArgs.push_back(RParam);
3062 }
3063 CallArgs.push_back(SharedsParam);
3064
3065 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3066 CallArgs);
3067 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3068 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3069 CGF.FinishFunction();
3070 return TaskEntry;
3071 }
3072
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3073 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074 SourceLocation Loc,
3075 QualType KmpInt32Ty,
3076 QualType KmpTaskTWithPrivatesPtrQTy,
3077 QualType KmpTaskTWithPrivatesQTy) {
3078 ASTContext &C = CGM.getContext();
3079 FunctionArgList Args;
3080 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081 ImplicitParamKind::Other);
3082 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084 ImplicitParamKind::Other);
3085 Args.push_back(&GtidArg);
3086 Args.push_back(&TaskTypeArg);
3087 const auto &DestructorFnInfo =
3088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3089 llvm::FunctionType *DestructorFnTy =
3090 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3091 std::string Name =
3092 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3093 auto *DestructorFn =
3094 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3095 Name, &CGM.getModule());
3096 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3097 DestructorFnInfo);
3098 DestructorFn->setDoesNotRecurse();
3099 CodeGenFunction CGF(CGM);
3100 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3101 Args, Loc, Loc);
3102
3103 LValue Base = CGF.EmitLoadOfPointerLValue(
3104 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3105 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106 const auto *KmpTaskTWithPrivatesQTyRD =
3107 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3109 Base = CGF.EmitLValueForField(Base, *FI);
3110 for (const auto *Field :
3111 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3112 if (QualType::DestructionKind DtorKind =
3113 Field->getType().isDestructedType()) {
3114 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3116 }
3117 }
3118 CGF.FinishFunction();
3119 return DestructorFn;
3120 }
3121
3122 /// Emit a privates mapping function for correct handling of private and
3123 /// firstprivate variables.
3124 /// \code
3125 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126 /// **noalias priv1,..., <tyn> **noalias privn) {
3127 /// *priv1 = &.privates.priv1;
3128 /// ...;
3129 /// *privn = &.privates.privn;
3130 /// }
3131 /// \endcode
3132 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3133 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135 ArrayRef<PrivateDataTy> Privates) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl TaskPrivatesArg(
3139 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3141 ImplicitParamKind::Other);
3142 Args.push_back(&TaskPrivatesArg);
3143 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144 unsigned Counter = 1;
3145 for (const Expr *E : Data.PrivateVars) {
3146 Args.push_back(ImplicitParamDecl::Create(
3147 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3148 C.getPointerType(C.getPointerType(E->getType()))
3149 .withConst()
3150 .withRestrict(),
3151 ImplicitParamKind::Other));
3152 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3153 PrivateVarsPos[VD] = Counter;
3154 ++Counter;
3155 }
3156 for (const Expr *E : Data.FirstprivateVars) {
3157 Args.push_back(ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3159 C.getPointerType(C.getPointerType(E->getType()))
3160 .withConst()
3161 .withRestrict(),
3162 ImplicitParamKind::Other));
3163 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3164 PrivateVarsPos[VD] = Counter;
3165 ++Counter;
3166 }
3167 for (const Expr *E : Data.LastprivateVars) {
3168 Args.push_back(ImplicitParamDecl::Create(
3169 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3170 C.getPointerType(C.getPointerType(E->getType()))
3171 .withConst()
3172 .withRestrict(),
3173 ImplicitParamKind::Other));
3174 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3175 PrivateVarsPos[VD] = Counter;
3176 ++Counter;
3177 }
3178 for (const VarDecl *VD : Data.PrivateLocals) {
3179 QualType Ty = VD->getType().getNonReferenceType();
3180 if (VD->getType()->isLValueReferenceType())
3181 Ty = C.getPointerType(Ty);
3182 if (isAllocatableDecl(VD))
3183 Ty = C.getPointerType(Ty);
3184 Args.push_back(ImplicitParamDecl::Create(
3185 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3186 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3187 ImplicitParamKind::Other));
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 const auto &TaskPrivatesMapFnInfo =
3192 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3193 llvm::FunctionType *TaskPrivatesMapTy =
3194 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3195 std::string Name =
3196 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3197 auto *TaskPrivatesMap = llvm::Function::Create(
3198 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3199 &CGM.getModule());
3200 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3201 TaskPrivatesMapFnInfo);
3202 if (CGM.getLangOpts().Optimize) {
3203 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3204 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3205 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3206 }
3207 CodeGenFunction CGF(CGM);
3208 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3209 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3210
3211 // *privi = &.privates.privi;
3212 LValue Base = CGF.EmitLoadOfPointerLValue(
3213 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3214 TaskPrivatesArg.getType()->castAs<PointerType>());
3215 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3216 Counter = 0;
3217 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220 LValue RefLVal =
3221 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3222 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3224 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3225 ++Counter;
3226 }
3227 CGF.FinishFunction();
3228 return TaskPrivatesMap;
3229 }
3230
3231 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3232 static void emitPrivatesInit(CodeGenFunction &CGF,
3233 const OMPExecutableDirective &D,
3234 Address KmpTaskSharedsPtr, LValue TDBase,
3235 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236 QualType SharedsTy, QualType SharedsPtrTy,
3237 const OMPTaskDataTy &Data,
3238 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239 ASTContext &C = CGF.getContext();
3240 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3241 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3242 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3243 ? OMPD_taskloop
3244 : OMPD_task;
3245 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3246 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247 LValue SrcBase;
3248 bool IsTargetTask =
3249 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3250 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3251 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252 // PointersArray, SizesArray, and MappersArray. The original variables for
3253 // these arrays are not captured and we get their addresses explicitly.
3254 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256 SrcBase = CGF.MakeAddrLValue(
3257 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3259 CGF.ConvertTypeForMem(SharedsTy)),
3260 SharedsTy);
3261 }
3262 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3263 for (const PrivateDataTy &Pair : Privates) {
3264 // Do not initialize private locals.
3265 if (Pair.second.isLocalPrivate()) {
3266 ++FI;
3267 continue;
3268 }
3269 const VarDecl *VD = Pair.second.PrivateCopy;
3270 const Expr *Init = VD->getAnyInitializer();
3271 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3272 !CGF.isTrivialInitializer(Init)))) {
3273 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3274 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275 const VarDecl *OriginalVD = Pair.second.Original;
3276 // Check if the variable is the target-based BasePointersArray,
3277 // PointersArray, SizesArray, or MappersArray.
3278 LValue SharedRefLValue;
3279 QualType Type = PrivateLValue.getType();
3280 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3281 if (IsTargetTask && !SharedField) {
3282 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284 cast<CapturedDecl>(OriginalVD->getDeclContext())
3285 ->getNumParams() == 0 &&
3286 isa<TranslationUnitDecl>(
3287 cast<CapturedDecl>(OriginalVD->getDeclContext())
3288 ->getDeclContext()) &&
3289 "Expected artificial target data variable.");
3290 SharedRefLValue =
3291 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3292 } else if (ForDup) {
3293 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3294 SharedRefLValue = CGF.MakeAddrLValue(
3295 SharedRefLValue.getAddress().withAlignment(
3296 C.getDeclAlign(OriginalVD)),
3297 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3298 SharedRefLValue.getTBAAInfo());
3299 } else if (CGF.LambdaCaptureFields.count(
3300 Pair.second.Original->getCanonicalDecl()) > 0 ||
3301 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3302 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3303 } else {
3304 // Processing for implicitly captured variables.
3305 InlinedOpenMPRegionRAII Region(
3306 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307 /*HasCancel=*/false, /*NoInheritance=*/true);
3308 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3309 }
3310 if (Type->isArrayType()) {
3311 // Initialize firstprivate array.
3312 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3313 // Perform simple memcpy.
3314 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3315 } else {
3316 // Initialize firstprivate array using element-by-element
3317 // initialization.
3318 CGF.EmitOMPAggregateAssign(
3319 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3320 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321 Address SrcElement) {
3322 // Clean up any temporaries needed by the initialization.
3323 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324 InitScope.addPrivate(Elem, SrcElement);
3325 (void)InitScope.Privatize();
3326 // Emit initialization for single element.
3327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328 CGF, &CapturesInfo);
3329 CGF.EmitAnyExprToMem(Init, DestElement,
3330 Init->getType().getQualifiers(),
3331 /*IsInitializer=*/false);
3332 });
3333 }
3334 } else {
3335 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3337 (void)InitScope.Privatize();
3338 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3340 /*capturedByInit=*/false);
3341 }
3342 } else {
3343 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3344 }
3345 }
3346 ++FI;
3347 }
3348 }
3349
3350 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3351 static bool checkInitIsRequired(CodeGenFunction &CGF,
3352 ArrayRef<PrivateDataTy> Privates) {
3353 bool InitRequired = false;
3354 for (const PrivateDataTy &Pair : Privates) {
3355 if (Pair.second.isLocalPrivate())
3356 continue;
3357 const VarDecl *VD = Pair.second.PrivateCopy;
3358 const Expr *Init = VD->getAnyInitializer();
3359 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3360 !CGF.isTrivialInitializer(Init));
3361 if (InitRequired)
3362 break;
3363 }
3364 return InitRequired;
3365 }
3366
3367
3368 /// Emit task_dup function (for initialization of
3369 /// private/firstprivate/lastprivate vars and last_iter flag)
3370 /// \code
3371 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372 /// lastpriv) {
3373 /// // setup lastprivate flag
3374 /// task_dst->last = lastpriv;
3375 /// // could be constructor calls here...
3376 /// }
3377 /// \endcode
3378 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3379 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380 const OMPExecutableDirective &D,
3381 QualType KmpTaskTWithPrivatesPtrQTy,
3382 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386 ASTContext &C = CGM.getContext();
3387 FunctionArgList Args;
3388 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389 KmpTaskTWithPrivatesPtrQTy,
3390 ImplicitParamKind::Other);
3391 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392 KmpTaskTWithPrivatesPtrQTy,
3393 ImplicitParamKind::Other);
3394 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395 ImplicitParamKind::Other);
3396 Args.push_back(&DstArg);
3397 Args.push_back(&SrcArg);
3398 Args.push_back(&LastprivArg);
3399 const auto &TaskDupFnInfo =
3400 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3401 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3402 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3403 auto *TaskDup = llvm::Function::Create(
3404 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3405 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3406 TaskDup->setDoesNotRecurse();
3407 CodeGenFunction CGF(CGM);
3408 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3409 Loc);
3410
3411 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412 CGF.GetAddrOfLocalVar(&DstArg),
3413 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414 // task_dst->liter = lastpriv;
3415 if (WithLastIter) {
3416 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3417 LValue Base = CGF.EmitLValueForField(
3418 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3419 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3420 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3422 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3423 }
3424
3425 // Emit initial values for private copies (if any).
3426 assert(!Privates.empty());
3427 Address KmpTaskSharedsPtr = Address::invalid();
3428 if (!Data.FirstprivateVars.empty()) {
3429 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430 CGF.GetAddrOfLocalVar(&SrcArg),
3431 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432 LValue Base = CGF.EmitLValueForField(
3433 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3434 KmpTaskSharedsPtr = Address(
3435 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3436 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3437 KmpTaskTShareds)),
3438 Loc),
3439 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3440 }
3441 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443 CGF.FinishFunction();
3444 return TaskDup;
3445 }
3446
3447 /// Checks if destructor function is required to be generated.
3448 /// \return true if cleanups are required, false otherwise.
3449 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)3450 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451 ArrayRef<PrivateDataTy> Privates) {
3452 for (const PrivateDataTy &P : Privates) {
3453 if (P.second.isLocalPrivate())
3454 continue;
3455 QualType Ty = P.second.Original->getType().getNonReferenceType();
3456 if (Ty.isDestructedType())
3457 return true;
3458 }
3459 return false;
3460 }
3461
3462 namespace {
3463 /// Loop generator for OpenMP iterator expression.
3464 class OMPIteratorGeneratorScope final
3465 : public CodeGenFunction::OMPPrivateScope {
3466 CodeGenFunction &CGF;
3467 const OMPIteratorExpr *E = nullptr;
3468 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470 OMPIteratorGeneratorScope() = delete;
3471 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472
3473 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)3474 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476 if (!E)
3477 return;
3478 SmallVector<llvm::Value *, 4> Uppers;
3479 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3481 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3482 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3483 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484 addPrivate(
3485 HelperData.CounterVD,
3486 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3487 }
3488 Privatize();
3489
3490 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492 LValue CLVal =
3493 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3494 HelperData.CounterVD->getType());
3495 // Counter = 0;
3496 CGF.EmitStoreOfScalar(
3497 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3498 CLVal);
3499 CodeGenFunction::JumpDest &ContDest =
3500 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3501 CodeGenFunction::JumpDest &ExitDest =
3502 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3503 // N = <number-of_iterations>;
3504 llvm::Value *N = Uppers[I];
3505 // cont:
3506 // if (Counter < N) goto body; else goto exit;
3507 CGF.EmitBlock(ContDest.getBlock());
3508 auto *CVal =
3509 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3510 llvm::Value *Cmp =
3511 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512 ? CGF.Builder.CreateICmpSLT(CVal, N)
3513 : CGF.Builder.CreateICmpULT(CVal, N);
3514 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3515 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3516 // body:
3517 CGF.EmitBlock(BodyBB);
3518 // Iteri = Begini + Counter * Stepi;
3519 CGF.EmitIgnoredExpr(HelperData.Update);
3520 }
3521 }
~OMPIteratorGeneratorScope()3522 ~OMPIteratorGeneratorScope() {
3523 if (!E)
3524 return;
3525 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526 // Counter = Counter + 1;
3527 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3528 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3529 // goto cont;
3530 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3531 // exit:
3532 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533 }
3534 }
3535 };
3536 } // namespace
3537
3538 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)3539 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3541 llvm::Value *Addr;
3542 if (OASE) {
3543 const Expr *Base = OASE->getBase();
3544 Addr = CGF.EmitScalarExpr(Base);
3545 } else {
3546 Addr = CGF.EmitLValue(E).getPointer(CGF);
3547 }
3548 llvm::Value *SizeVal;
3549 QualType Ty = E->getType();
3550 if (OASE) {
3551 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3552 for (const Expr *SE : OASE->getDimensions()) {
3553 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3554 Sz = CGF.EmitScalarConversion(
3555 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3556 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3557 }
3558 } else if (const auto *ASE =
3559 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3560 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3561 Address UpAddrAddress = UpAddrLVal.getAddress();
3562 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3564 /*Idx0=*/1);
3565 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3566 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3567 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3568 } else {
3569 SizeVal = CGF.getTypeSize(Ty);
3570 }
3571 return std::make_pair(Addr, SizeVal);
3572 }
3573
3574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)3575 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3577 if (KmpTaskAffinityInfoTy.isNull()) {
3578 RecordDecl *KmpAffinityInfoRD =
3579 C.buildImplicitRecord("kmp_task_affinity_info_t");
3580 KmpAffinityInfoRD->startDefinition();
3581 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3582 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3583 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3584 KmpAffinityInfoRD->completeDefinition();
3585 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3586 }
3587 }
3588
3589 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)3590 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591 const OMPExecutableDirective &D,
3592 llvm::Function *TaskFunction, QualType SharedsTy,
3593 Address Shareds, const OMPTaskDataTy &Data) {
3594 ASTContext &C = CGM.getContext();
3595 llvm::SmallVector<PrivateDataTy, 4> Privates;
3596 // Aggregate privates and sort them by the alignment.
3597 const auto *I = Data.PrivateCopies.begin();
3598 for (const Expr *E : Data.PrivateVars) {
3599 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3600 Privates.emplace_back(
3601 C.getDeclAlign(VD),
3602 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3603 /*PrivateElemInit=*/nullptr));
3604 ++I;
3605 }
3606 I = Data.FirstprivateCopies.begin();
3607 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608 for (const Expr *E : Data.FirstprivateVars) {
3609 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3610 Privates.emplace_back(
3611 C.getDeclAlign(VD),
3612 PrivateHelpersTy(
3613 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3614 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3615 ++I;
3616 ++IElemInitRef;
3617 }
3618 I = Data.LastprivateCopies.begin();
3619 for (const Expr *E : Data.LastprivateVars) {
3620 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3621 Privates.emplace_back(
3622 C.getDeclAlign(VD),
3623 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3624 /*PrivateElemInit=*/nullptr));
3625 ++I;
3626 }
3627 for (const VarDecl *VD : Data.PrivateLocals) {
3628 if (isAllocatableDecl(VD))
3629 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3630 else
3631 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3632 }
3633 llvm::stable_sort(Privates,
3634 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635 return L.first > R.first;
3636 });
3637 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638 // Build type kmp_routine_entry_t (if not built yet).
3639 emitKmpRoutineEntryT(KmpInt32Ty);
3640 // Build type kmp_task_t (if not built yet).
3641 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3642 if (SavedKmpTaskloopTQTy.isNull()) {
3643 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3644 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3645 }
3646 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647 } else {
3648 assert((D.getDirectiveKind() == OMPD_task ||
3649 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651 "Expected taskloop, task or target directive");
3652 if (SavedKmpTaskTQTy.isNull()) {
3653 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655 }
3656 KmpTaskTQTy = SavedKmpTaskTQTy;
3657 }
3658 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3659 // Build particular struct kmp_task_t for the given task.
3660 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3663 QualType KmpTaskTWithPrivatesPtrQTy =
3664 C.getPointerType(KmpTaskTWithPrivatesQTy);
3665 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3666 llvm::Value *KmpTaskTWithPrivatesTySize =
3667 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3668 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3669
3670 // Emit initial values for private copies (if any).
3671 llvm::Value *TaskPrivatesMap = nullptr;
3672 llvm::Type *TaskPrivatesMapTy =
3673 std::next(TaskFunction->arg_begin(), 3)->getType();
3674 if (!Privates.empty()) {
3675 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3676 TaskPrivatesMap =
3677 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3678 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679 TaskPrivatesMap, TaskPrivatesMapTy);
3680 } else {
3681 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682 cast<llvm::PointerType>(TaskPrivatesMapTy));
3683 }
3684 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685 // kmp_task_t *tt);
3686 llvm::Function *TaskEntry = emitProxyTaskFunction(
3687 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689 TaskPrivatesMap);
3690
3691 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693 // kmp_routine_entry_t *task_entry);
3694 // Task flags. Format is taken from
3695 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696 // description of kmp_tasking_flags struct.
3697 enum {
3698 TiedFlag = 0x1,
3699 FinalFlag = 0x2,
3700 DestructorsFlag = 0x8,
3701 PriorityFlag = 0x20,
3702 DetachableFlag = 0x40,
3703 };
3704 unsigned Flags = Data.Tied ? TiedFlag : 0;
3705 bool NeedsCleanup = false;
3706 if (!Privates.empty()) {
3707 NeedsCleanup =
3708 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709 if (NeedsCleanup)
3710 Flags = Flags | DestructorsFlag;
3711 }
3712 if (Data.Priority.getInt())
3713 Flags = Flags | PriorityFlag;
3714 if (D.hasClausesOfKind<OMPDetachClause>())
3715 Flags = Flags | DetachableFlag;
3716 llvm::Value *TaskFlags =
3717 Data.Final.getPointer()
3718 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3719 CGF.Builder.getInt32(FinalFlag),
3720 CGF.Builder.getInt32(/*C=*/0))
3721 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3722 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3723 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3724 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727 TaskEntry, KmpRoutineEntryPtrTy)};
3728 llvm::Value *NewTask;
3729 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730 // Check if we have any device clause associated with the directive.
3731 const Expr *Device = nullptr;
3732 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733 Device = C->getDevice();
3734 // Emit device ID if any otherwise use default value.
3735 llvm::Value *DeviceID;
3736 if (Device)
3737 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3738 CGF.Int64Ty, /*isSigned=*/true);
3739 else
3740 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3741 AllocArgs.push_back(DeviceID);
3742 NewTask = CGF.EmitRuntimeCall(
3743 OMPBuilder.getOrCreateRuntimeFunction(
3744 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3745 AllocArgs);
3746 } else {
3747 NewTask =
3748 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3749 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3750 AllocArgs);
3751 }
3752 // Emit detach clause initialization.
3753 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754 // task_descriptor);
3755 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757 LValue EvtLVal = CGF.EmitLValue(Evt);
3758
3759 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760 // int gtid, kmp_task_t *task);
3761 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3762 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3763 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3764 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765 OMPBuilder.getOrCreateRuntimeFunction(
3766 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3767 {Loc, Tid, NewTask});
3768 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3769 Evt->getExprLoc());
3770 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3771 }
3772 // Process affinity clauses.
3773 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774 // Process list of affinity data.
3775 ASTContext &C = CGM.getContext();
3776 Address AffinitiesArray = Address::invalid();
3777 // Calculate number of elements to form the array of affinity data.
3778 llvm::Value *NumOfElements = nullptr;
3779 unsigned NumAffinities = 0;
3780 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781 if (const Expr *Modifier = C->getModifier()) {
3782 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3783 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3785 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3786 NumOfElements =
3787 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3788 }
3789 } else {
3790 NumAffinities += C->varlist_size();
3791 }
3792 }
3793 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3794 // Fields ids in kmp_task_affinity_info record.
3795 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796
3797 QualType KmpTaskAffinityInfoArrayTy;
3798 if (NumOfElements) {
3799 NumOfElements = CGF.Builder.CreateNUWAdd(
3800 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3801 auto *OVE = new (C) OpaqueValueExpr(
3802 Loc,
3803 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3804 VK_PRValue);
3805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806 RValue::get(NumOfElements));
3807 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3809 /*IndexTypeQuals=*/0);
3810 // Properly emit variable-sized array.
3811 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3812 ImplicitParamKind::Other);
3813 CGF.EmitVarDecl(*PD);
3814 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3815 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3816 /*isSigned=*/false);
3817 } else {
3818 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819 KmpTaskAffinityInfoTy,
3820 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3821 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822 AffinitiesArray =
3823 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3824 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3825 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3826 /*isSigned=*/false);
3827 }
3828
3829 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830 // Fill array by elements without iterators.
3831 unsigned Pos = 0;
3832 bool HasIterator = false;
3833 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834 if (C->getModifier()) {
3835 HasIterator = true;
3836 continue;
3837 }
3838 for (const Expr *E : C->varlist()) {
3839 llvm::Value *Addr;
3840 llvm::Value *Size;
3841 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3842 LValue Base =
3843 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3844 KmpTaskAffinityInfoTy);
3845 // affs[i].base_addr = &<Affinities[i].second>;
3846 LValue BaseAddrLVal = CGF.EmitLValueForField(
3847 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3848 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3849 BaseAddrLVal);
3850 // affs[i].len = sizeof(<Affinities[i].second>);
3851 LValue LenLVal = CGF.EmitLValueForField(
3852 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3853 CGF.EmitStoreOfScalar(Size, LenLVal);
3854 ++Pos;
3855 }
3856 }
3857 LValue PosLVal;
3858 if (HasIterator) {
3859 PosLVal = CGF.MakeAddrLValue(
3860 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3861 C.getSizeType());
3862 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3863 }
3864 // Process elements with iterators.
3865 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866 const Expr *Modifier = C->getModifier();
3867 if (!Modifier)
3868 continue;
3869 OMPIteratorGeneratorScope IteratorScope(
3870 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3871 for (const Expr *E : C->varlist()) {
3872 llvm::Value *Addr;
3873 llvm::Value *Size;
3874 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3875 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3876 LValue Base =
3877 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3878 KmpTaskAffinityInfoTy);
3879 // affs[i].base_addr = &<Affinities[i].second>;
3880 LValue BaseAddrLVal = CGF.EmitLValueForField(
3881 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3882 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3883 BaseAddrLVal);
3884 // affs[i].len = sizeof(<Affinities[i].second>);
3885 LValue LenLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3887 CGF.EmitStoreOfScalar(Size, LenLVal);
3888 Idx = CGF.Builder.CreateNUWAdd(
3889 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3890 CGF.EmitStoreOfScalar(Idx, PosLVal);
3891 }
3892 }
3893 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895 // naffins, kmp_task_affinity_info_t *affin_list);
3896 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897 llvm::Value *GTid = getThreadID(CGF, Loc);
3898 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3900 // FIXME: Emit the function and ignore its result for now unless the
3901 // runtime function is properly implemented.
3902 (void)CGF.EmitRuntimeCall(
3903 OMPBuilder.getOrCreateRuntimeFunction(
3904 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3905 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906 }
3907 llvm::Value *NewTaskNewTaskTTy =
3908 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909 NewTask, KmpTaskTWithPrivatesPtrTy);
3910 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3911 KmpTaskTWithPrivatesQTy);
3912 LValue TDBase =
3913 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3914 // Fill the data in the resulting kmp_task_t record.
3915 // Copy shareds if there are any.
3916 Address KmpTaskSharedsPtr = Address::invalid();
3917 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918 KmpTaskSharedsPtr = Address(
3919 CGF.EmitLoadOfScalar(
3920 CGF.EmitLValueForField(
3921 TDBase,
3922 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3923 Loc),
3924 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3925 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3926 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3927 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3928 }
3929 // Emit initial values for private copies (if any).
3930 TaskResultTy Result;
3931 if (!Privates.empty()) {
3932 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3933 SharedsTy, SharedsPtrTy, Data, Privates,
3934 /*ForDup=*/false);
3935 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3936 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937 Result.TaskDupFn = emitTaskDupFunction(
3938 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940 /*WithLastIter=*/!Data.LastprivateVars.empty());
3941 }
3942 }
3943 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944 enum { Priority = 0, Destructors = 1 };
3945 // Provide pointer to function with destructors for privates.
3946 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3947 const RecordDecl *KmpCmplrdataUD =
3948 (*FI)->getType()->getAsUnionType()->getDecl();
3949 if (NeedsCleanup) {
3950 llvm::Value *DestructorFn = emitDestructorsFunction(
3951 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952 KmpTaskTWithPrivatesQTy);
3953 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3954 LValue DestructorsLV = CGF.EmitLValueForField(
3955 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3956 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957 DestructorFn, KmpRoutineEntryPtrTy),
3958 DestructorsLV);
3959 }
3960 // Set priority.
3961 if (Data.Priority.getInt()) {
3962 LValue Data2LV = CGF.EmitLValueForField(
3963 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3964 LValue PriorityLV = CGF.EmitLValueForField(
3965 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3966 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3967 }
3968 Result.NewTask = NewTask;
3969 Result.TaskEntry = TaskEntry;
3970 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971 Result.TDBase = TDBase;
3972 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973 return Result;
3974 }
3975
3976 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)3977 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978 RTLDependenceKindTy DepKind;
3979 switch (K) {
3980 case OMPC_DEPEND_in:
3981 DepKind = RTLDependenceKindTy::DepIn;
3982 break;
3983 // Out and InOut dependencies must use the same code.
3984 case OMPC_DEPEND_out:
3985 case OMPC_DEPEND_inout:
3986 DepKind = RTLDependenceKindTy::DepInOut;
3987 break;
3988 case OMPC_DEPEND_mutexinoutset:
3989 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990 break;
3991 case OMPC_DEPEND_inoutset:
3992 DepKind = RTLDependenceKindTy::DepInOutSet;
3993 break;
3994 case OMPC_DEPEND_outallmemory:
3995 DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996 break;
3997 case OMPC_DEPEND_source:
3998 case OMPC_DEPEND_sink:
3999 case OMPC_DEPEND_depobj:
4000 case OMPC_DEPEND_inoutallmemory:
4001 case OMPC_DEPEND_unknown:
4002 llvm_unreachable("Unknown task dependence type");
4003 }
4004 return DepKind;
4005 }
4006
4007 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4008 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009 QualType &FlagsTy) {
4010 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4011 if (KmpDependInfoTy.isNull()) {
4012 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4013 KmpDependInfoRD->startDefinition();
4014 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4015 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4016 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4017 KmpDependInfoRD->completeDefinition();
4018 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4019 }
4020 }
4021
4022 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4023 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024 SourceLocation Loc) {
4025 ASTContext &C = CGM.getContext();
4026 QualType FlagsTy;
4027 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028 RecordDecl *KmpDependInfoRD =
4029 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4030 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4031 LValue Base = CGF.EmitLoadOfPointerLValue(
4032 DepobjLVal.getAddress().withElementType(
4033 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4034 KmpDependInfoPtrTy->castAs<PointerType>());
4035 Address DepObjAddr = CGF.Builder.CreateGEP(
4036 CGF, Base.getAddress(),
4037 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4038 LValue NumDepsBase = CGF.MakeAddrLValue(
4039 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4040 // NumDeps = deps[i].base_addr;
4041 LValue BaseAddrLVal = CGF.EmitLValueForField(
4042 NumDepsBase,
4043 *std::next(KmpDependInfoRD->field_begin(),
4044 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4046 return std::make_pair(NumDeps, Base);
4047 }
4048
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4049 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050 llvm::PointerUnion<unsigned *, LValue *> Pos,
4051 const OMPTaskDataTy::DependData &Data,
4052 Address DependenciesArray) {
4053 CodeGenModule &CGM = CGF.CGM;
4054 ASTContext &C = CGM.getContext();
4055 QualType FlagsTy;
4056 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057 RecordDecl *KmpDependInfoRD =
4058 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4060
4061 OMPIteratorGeneratorScope IteratorScope(
4062 CGF, cast_or_null<OMPIteratorExpr>(
4063 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064 : nullptr));
4065 for (const Expr *E : Data.DepExprs) {
4066 llvm::Value *Addr;
4067 llvm::Value *Size;
4068
4069 // The expression will be a nullptr in the 'omp_all_memory' case.
4070 if (E) {
4071 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4072 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4073 } else {
4074 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4075 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4076 }
4077 LValue Base;
4078 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4079 Base = CGF.MakeAddrLValue(
4080 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4081 } else {
4082 assert(E && "Expected a non-null expression");
4083 LValue &PosLVal = *cast<LValue *>(Pos);
4084 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4085 Base = CGF.MakeAddrLValue(
4086 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4087 }
4088 // deps[i].base_addr = &<Dependencies[i].second>;
4089 LValue BaseAddrLVal = CGF.EmitLValueForField(
4090 Base,
4091 *std::next(KmpDependInfoRD->field_begin(),
4092 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4094 // deps[i].len = sizeof(<Dependencies[i].second>);
4095 LValue LenLVal = CGF.EmitLValueForField(
4096 Base, *std::next(KmpDependInfoRD->field_begin(),
4097 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098 CGF.EmitStoreOfScalar(Size, LenLVal);
4099 // deps[i].flags = <Dependencies[i].first>;
4100 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4101 LValue FlagsLVal = CGF.EmitLValueForField(
4102 Base,
4103 *std::next(KmpDependInfoRD->field_begin(),
4104 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105 CGF.EmitStoreOfScalar(
4106 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4107 FlagsLVal);
4108 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4109 ++(*P);
4110 } else {
4111 LValue &PosLVal = *cast<LValue *>(Pos);
4112 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4113 Idx = CGF.Builder.CreateNUWAdd(Idx,
4114 llvm::ConstantInt::get(Idx->getType(), 1));
4115 CGF.EmitStoreOfScalar(Idx, PosLVal);
4116 }
4117 }
4118 }
4119
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4120 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122 const OMPTaskDataTy::DependData &Data) {
4123 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124 "Expected depobj dependency kind.");
4125 SmallVector<llvm::Value *, 4> Sizes;
4126 SmallVector<LValue, 4> SizeLVals;
4127 ASTContext &C = CGF.getContext();
4128 {
4129 OMPIteratorGeneratorScope IteratorScope(
4130 CGF, cast_or_null<OMPIteratorExpr>(
4131 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132 : nullptr));
4133 for (const Expr *E : Data.DepExprs) {
4134 llvm::Value *NumDeps;
4135 LValue Base;
4136 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4137 std::tie(NumDeps, Base) =
4138 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4139 LValue NumLVal = CGF.MakeAddrLValue(
4140 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4141 C.getUIntPtrType());
4142 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4143 NumLVal.getAddress());
4144 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4145 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4146 CGF.EmitStoreOfScalar(Add, NumLVal);
4147 SizeLVals.push_back(NumLVal);
4148 }
4149 }
4150 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151 llvm::Value *Size =
4152 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4153 Sizes.push_back(Size);
4154 }
4155 return Sizes;
4156 }
4157
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4158 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159 QualType &KmpDependInfoTy,
4160 LValue PosLVal,
4161 const OMPTaskDataTy::DependData &Data,
4162 Address DependenciesArray) {
4163 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164 "Expected depobj dependency kind.");
4165 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4166 {
4167 OMPIteratorGeneratorScope IteratorScope(
4168 CGF, cast_or_null<OMPIteratorExpr>(
4169 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170 : nullptr));
4171 for (const Expr *E : Data.DepExprs) {
4172 llvm::Value *NumDeps;
4173 LValue Base;
4174 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4175 std::tie(NumDeps, Base) =
4176 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4177
4178 // memcopy dependency data.
4179 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4180 ElSize,
4181 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4182 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4183 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4184 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4185
4186 // Increase pos.
4187 // pos += size;
4188 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4189 CGF.EmitStoreOfScalar(Add, PosLVal);
4190 }
4191 }
4192 }
4193
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4194 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4195 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4196 SourceLocation Loc) {
4197 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4198 return D.DepExprs.empty();
4199 }))
4200 return std::make_pair(nullptr, Address::invalid());
4201 // Process list of dependencies.
4202 ASTContext &C = CGM.getContext();
4203 Address DependenciesArray = Address::invalid();
4204 llvm::Value *NumOfElements = nullptr;
4205 unsigned NumDependencies = std::accumulate(
4206 Dependencies.begin(), Dependencies.end(), 0,
4207 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4208 return D.DepKind == OMPC_DEPEND_depobj
4209 ? V
4210 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4211 });
4212 QualType FlagsTy;
4213 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4214 bool HasDepobjDeps = false;
4215 bool HasRegularWithIterators = false;
4216 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4217 llvm::Value *NumOfRegularWithIterators =
4218 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4219 // Calculate number of depobj dependencies and regular deps with the
4220 // iterators.
4221 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4222 if (D.DepKind == OMPC_DEPEND_depobj) {
4223 SmallVector<llvm::Value *, 4> Sizes =
4224 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4225 for (llvm::Value *Size : Sizes) {
4226 NumOfDepobjElements =
4227 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4228 }
4229 HasDepobjDeps = true;
4230 continue;
4231 }
4232 // Include number of iterations, if any.
4233
4234 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4235 llvm::Value *ClauseIteratorSpace =
4236 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4237 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4238 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4239 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4240 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4241 }
4242 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4243 ClauseIteratorSpace,
4244 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4245 NumOfRegularWithIterators =
4246 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4247 HasRegularWithIterators = true;
4248 continue;
4249 }
4250 }
4251
4252 QualType KmpDependInfoArrayTy;
4253 if (HasDepobjDeps || HasRegularWithIterators) {
4254 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4255 /*isSigned=*/false);
4256 if (HasDepobjDeps) {
4257 NumOfElements =
4258 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4259 }
4260 if (HasRegularWithIterators) {
4261 NumOfElements =
4262 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4263 }
4264 auto *OVE = new (C) OpaqueValueExpr(
4265 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4266 VK_PRValue);
4267 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4268 RValue::get(NumOfElements));
4269 KmpDependInfoArrayTy =
4270 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4271 /*IndexTypeQuals=*/0);
4272 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4273 // Properly emit variable-sized array.
4274 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4275 ImplicitParamKind::Other);
4276 CGF.EmitVarDecl(*PD);
4277 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4278 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4279 /*isSigned=*/false);
4280 } else {
4281 KmpDependInfoArrayTy = C.getConstantArrayType(
4282 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4283 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4284 DependenciesArray =
4285 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4286 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4287 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4288 /*isSigned=*/false);
4289 }
4290 unsigned Pos = 0;
4291 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4292 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4293 continue;
4294 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4295 }
4296 // Copy regular dependencies with iterators.
4297 LValue PosLVal = CGF.MakeAddrLValue(
4298 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4299 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4300 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4301 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4302 continue;
4303 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4304 }
4305 // Copy final depobj arrays without iterators.
4306 if (HasDepobjDeps) {
4307 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4308 if (Dep.DepKind != OMPC_DEPEND_depobj)
4309 continue;
4310 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4311 }
4312 }
4313 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4314 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4315 return std::make_pair(NumOfElements, DependenciesArray);
4316 }
4317
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4318 Address CGOpenMPRuntime::emitDepobjDependClause(
4319 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4320 SourceLocation Loc) {
4321 if (Dependencies.DepExprs.empty())
4322 return Address::invalid();
4323 // Process list of dependencies.
4324 ASTContext &C = CGM.getContext();
4325 Address DependenciesArray = Address::invalid();
4326 unsigned NumDependencies = Dependencies.DepExprs.size();
4327 QualType FlagsTy;
4328 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4329 RecordDecl *KmpDependInfoRD =
4330 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4331
4332 llvm::Value *Size;
4333 // Define type kmp_depend_info[<Dependencies.size()>];
4334 // For depobj reserve one extra element to store the number of elements.
4335 // It is required to handle depobj(x) update(in) construct.
4336 // kmp_depend_info[<Dependencies.size()>] deps;
4337 llvm::Value *NumDepsVal;
4338 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4339 if (const auto *IE =
4340 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4341 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4342 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4343 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4344 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4345 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4346 }
4347 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4348 NumDepsVal);
4349 CharUnits SizeInBytes =
4350 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4351 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4352 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4353 NumDepsVal =
4354 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4355 } else {
4356 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4357 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4358 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4359 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4360 Size = CGM.getSize(Sz.alignTo(Align));
4361 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4362 }
4363 // Need to allocate on the dynamic memory.
4364 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4365 // Use default allocator.
4366 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4367 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4368
4369 llvm::Value *Addr =
4370 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4371 CGM.getModule(), OMPRTL___kmpc_alloc),
4372 Args, ".dep.arr.addr");
4373 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4375 Addr, CGF.Builder.getPtrTy(0));
4376 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4377 // Write number of elements in the first element of array for depobj.
4378 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4379 // deps[i].base_addr = NumDependencies;
4380 LValue BaseAddrLVal = CGF.EmitLValueForField(
4381 Base,
4382 *std::next(KmpDependInfoRD->field_begin(),
4383 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4384 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4385 llvm::PointerUnion<unsigned *, LValue *> Pos;
4386 unsigned Idx = 1;
4387 LValue PosLVal;
4388 if (Dependencies.IteratorExpr) {
4389 PosLVal = CGF.MakeAddrLValue(
4390 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4391 C.getSizeType());
4392 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4393 /*IsInit=*/true);
4394 Pos = &PosLVal;
4395 } else {
4396 Pos = &Idx;
4397 }
4398 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4399 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4400 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4401 CGF.Int8Ty);
4402 return DependenciesArray;
4403 }
4404
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4405 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4406 SourceLocation Loc) {
4407 ASTContext &C = CGM.getContext();
4408 QualType FlagsTy;
4409 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4411 C.VoidPtrTy.castAs<PointerType>());
4412 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4413 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4414 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4415 CGF.ConvertTypeForMem(KmpDependInfoTy));
4416 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4417 Addr.getElementType(), Addr.emitRawPointer(CGF),
4418 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4419 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4420 CGF.VoidPtrTy);
4421 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4422 // Use default allocator.
4423 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4424 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4425
4426 // _kmpc_free(gtid, addr, nullptr);
4427 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4428 CGM.getModule(), OMPRTL___kmpc_free),
4429 Args);
4430 }
4431
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4432 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4433 OpenMPDependClauseKind NewDepKind,
4434 SourceLocation Loc) {
4435 ASTContext &C = CGM.getContext();
4436 QualType FlagsTy;
4437 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438 RecordDecl *KmpDependInfoRD =
4439 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4440 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4441 llvm::Value *NumDeps;
4442 LValue Base;
4443 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4444
4445 Address Begin = Base.getAddress();
4446 // Cast from pointer to array type to pointer to single element.
4447 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4448 Begin.emitRawPointer(CGF), NumDeps);
4449 // The basic structure here is a while-do loop.
4450 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4451 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4452 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4453 CGF.EmitBlock(BodyBB);
4454 llvm::PHINode *ElementPHI =
4455 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4456 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4457 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4458 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4459 Base.getTBAAInfo());
4460 // deps[i].flags = NewDepKind;
4461 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4462 LValue FlagsLVal = CGF.EmitLValueForField(
4463 Base, *std::next(KmpDependInfoRD->field_begin(),
4464 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4465 CGF.EmitStoreOfScalar(
4466 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4467 FlagsLVal);
4468
4469 // Shift the address forward by one element.
4470 llvm::Value *ElementNext =
4471 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4472 .emitRawPointer(CGF);
4473 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4474 llvm::Value *IsEmpty =
4475 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4476 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4477 // Done.
4478 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4479 }
4480
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4481 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4482 const OMPExecutableDirective &D,
4483 llvm::Function *TaskFunction,
4484 QualType SharedsTy, Address Shareds,
4485 const Expr *IfCond,
4486 const OMPTaskDataTy &Data) {
4487 if (!CGF.HaveInsertPoint())
4488 return;
4489
4490 TaskResultTy Result =
4491 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4492 llvm::Value *NewTask = Result.NewTask;
4493 llvm::Function *TaskEntry = Result.TaskEntry;
4494 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4495 LValue TDBase = Result.TDBase;
4496 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4497 // Process list of dependences.
4498 Address DependenciesArray = Address::invalid();
4499 llvm::Value *NumOfElements;
4500 std::tie(NumOfElements, DependenciesArray) =
4501 emitDependClause(CGF, Data.Dependences, Loc);
4502
4503 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4504 // libcall.
4505 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4506 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4507 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4508 // list is not empty
4509 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4510 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4511 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4512 llvm::Value *DepTaskArgs[7];
4513 if (!Data.Dependences.empty()) {
4514 DepTaskArgs[0] = UpLoc;
4515 DepTaskArgs[1] = ThreadID;
4516 DepTaskArgs[2] = NewTask;
4517 DepTaskArgs[3] = NumOfElements;
4518 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4519 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4520 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4521 }
4522 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4523 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4524 if (!Data.Tied) {
4525 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4526 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4527 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4528 }
4529 if (!Data.Dependences.empty()) {
4530 CGF.EmitRuntimeCall(
4531 OMPBuilder.getOrCreateRuntimeFunction(
4532 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4533 DepTaskArgs);
4534 } else {
4535 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4536 CGM.getModule(), OMPRTL___kmpc_omp_task),
4537 TaskArgs);
4538 }
4539 // Check if parent region is untied and build return for untied task;
4540 if (auto *Region =
4541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4542 Region->emitUntiedSwitch(CGF);
4543 };
4544
4545 llvm::Value *DepWaitTaskArgs[7];
4546 if (!Data.Dependences.empty()) {
4547 DepWaitTaskArgs[0] = UpLoc;
4548 DepWaitTaskArgs[1] = ThreadID;
4549 DepWaitTaskArgs[2] = NumOfElements;
4550 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4551 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4552 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553 DepWaitTaskArgs[6] =
4554 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4555 }
4556 auto &M = CGM.getModule();
4557 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4558 TaskEntry, &Data, &DepWaitTaskArgs,
4559 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4560 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4561 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4562 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4563 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4564 // is specified.
4565 if (!Data.Dependences.empty())
4566 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4567 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4568 DepWaitTaskArgs);
4569 // Call proxy_task_entry(gtid, new_task);
4570 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4571 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4572 Action.Enter(CGF);
4573 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4574 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4575 OutlinedFnArgs);
4576 };
4577
4578 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4579 // kmp_task_t *new_task);
4580 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4581 // kmp_task_t *new_task);
4582 RegionCodeGenTy RCG(CodeGen);
4583 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4584 M, OMPRTL___kmpc_omp_task_begin_if0),
4585 TaskArgs,
4586 OMPBuilder.getOrCreateRuntimeFunction(
4587 M, OMPRTL___kmpc_omp_task_complete_if0),
4588 TaskArgs);
4589 RCG.setAction(Action);
4590 RCG(CGF);
4591 };
4592
4593 if (IfCond) {
4594 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4595 } else {
4596 RegionCodeGenTy ThenRCG(ThenCodeGen);
4597 ThenRCG(CGF);
4598 }
4599 }
4600
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4601 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602 const OMPLoopDirective &D,
4603 llvm::Function *TaskFunction,
4604 QualType SharedsTy, Address Shareds,
4605 const Expr *IfCond,
4606 const OMPTaskDataTy &Data) {
4607 if (!CGF.HaveInsertPoint())
4608 return;
4609 TaskResultTy Result =
4610 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4612 // libcall.
4613 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615 // sched, kmp_uint64 grainsize, void *task_dup);
4616 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618 llvm::Value *IfVal;
4619 if (IfCond) {
4620 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4621 /*isSigned=*/true);
4622 } else {
4623 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4624 }
4625
4626 LValue LBLVal = CGF.EmitLValueForField(
4627 Result.TDBase,
4628 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4629 const auto *LBVar =
4630 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4631 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4632 /*IsInitializer=*/true);
4633 LValue UBLVal = CGF.EmitLValueForField(
4634 Result.TDBase,
4635 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4636 const auto *UBVar =
4637 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4638 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4639 /*IsInitializer=*/true);
4640 LValue StLVal = CGF.EmitLValueForField(
4641 Result.TDBase,
4642 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4643 const auto *StVar =
4644 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4645 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4646 /*IsInitializer=*/true);
4647 // Store reductions address.
4648 LValue RedLVal = CGF.EmitLValueForField(
4649 Result.TDBase,
4650 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4651 if (Data.Reductions) {
4652 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4653 } else {
4654 CGF.EmitNullInitialization(RedLVal.getAddress(),
4655 CGF.getContext().VoidPtrTy);
4656 }
4657 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4658 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4659 UpLoc,
4660 ThreadID,
4661 Result.NewTask,
4662 IfVal,
4663 LBLVal.getPointer(CGF),
4664 UBLVal.getPointer(CGF),
4665 CGF.EmitLoadOfScalar(StLVal, Loc),
4666 llvm::ConstantInt::getSigned(
4667 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4668 llvm::ConstantInt::getSigned(
4669 CGF.IntTy, Data.Schedule.getPointer()
4670 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4671 : NoSchedule),
4672 Data.Schedule.getPointer()
4673 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4674 /*isSigned=*/false)
4675 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4676 if (Data.HasModifier)
4677 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4678
4679 TaskArgs.push_back(Result.TaskDupFn
4680 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681 Result.TaskDupFn, CGF.VoidPtrTy)
4682 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4683 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4684 CGM.getModule(), Data.HasModifier
4685 ? OMPRTL___kmpc_taskloop_5
4686 : OMPRTL___kmpc_taskloop),
4687 TaskArgs);
4688 }
4689
4690 /// Emit reduction operation for each element of array (required for
4691 /// array sections) LHS op = RHS.
4692 /// \param Type Type of array.
4693 /// \param LHSVar Variable on the left side of the reduction operation
4694 /// (references element of array in original variable).
4695 /// \param RHSVar Variable on the right side of the reduction operation
4696 /// (references element of array in original variable).
4697 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4698 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)4699 static void EmitOMPAggregateReduction(
4700 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4701 const VarDecl *RHSVar,
4702 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4703 const Expr *, const Expr *)> &RedOpGen,
4704 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4705 const Expr *UpExpr = nullptr) {
4706 // Perform element-by-element initialization.
4707 QualType ElementTy;
4708 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4709 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4710
4711 // Drill down to the base element type on both arrays.
4712 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4713 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4714
4715 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4716 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4717 // Cast from pointer to array type to pointer to single element.
4718 llvm::Value *LHSEnd =
4719 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4720 // The basic structure here is a while-do loop.
4721 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4722 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4723 llvm::Value *IsEmpty =
4724 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4725 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4726
4727 // Enter the loop body, making that address the current address.
4728 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4729 CGF.EmitBlock(BodyBB);
4730
4731 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4732
4733 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4734 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4735 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4736 Address RHSElementCurrent(
4737 RHSElementPHI, RHSAddr.getElementType(),
4738 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4739
4740 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4741 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4742 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4743 Address LHSElementCurrent(
4744 LHSElementPHI, LHSAddr.getElementType(),
4745 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4746
4747 // Emit copy.
4748 CodeGenFunction::OMPPrivateScope Scope(CGF);
4749 Scope.addPrivate(LHSVar, LHSElementCurrent);
4750 Scope.addPrivate(RHSVar, RHSElementCurrent);
4751 Scope.Privatize();
4752 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4753 Scope.ForceCleanup();
4754
4755 // Shift the address forward by one element.
4756 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4757 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4758 "omp.arraycpy.dest.element");
4759 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4761 "omp.arraycpy.src.element");
4762 // Check whether we've reached the end.
4763 llvm::Value *Done =
4764 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4765 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4766 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4767 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4768
4769 // Done.
4770 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4771 }
4772
4773 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4774 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4775 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)4776 static void emitReductionCombiner(CodeGenFunction &CGF,
4777 const Expr *ReductionOp) {
4778 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4779 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4780 if (const auto *DRE =
4781 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4782 if (const auto *DRD =
4783 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4784 std::pair<llvm::Function *, llvm::Function *> Reduction =
4785 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4786 RValue Func = RValue::get(Reduction.first);
4787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4788 CGF.EmitIgnoredExpr(ReductionOp);
4789 return;
4790 }
4791 CGF.EmitIgnoredExpr(ReductionOp);
4792 }
4793
emitReductionFunction(StringRef ReducerName,SourceLocation Loc,llvm::Type * ArgsElemType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)4794 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4795 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4796 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4797 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4798 ASTContext &C = CGM.getContext();
4799
4800 // void reduction_func(void *LHSArg, void *RHSArg);
4801 FunctionArgList Args;
4802 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4803 ImplicitParamKind::Other);
4804 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4805 ImplicitParamKind::Other);
4806 Args.push_back(&LHSArg);
4807 Args.push_back(&RHSArg);
4808 const auto &CGFI =
4809 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4810 std::string Name = getReductionFuncName(ReducerName);
4811 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4812 llvm::GlobalValue::InternalLinkage, Name,
4813 &CGM.getModule());
4814 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4815 Fn->setDoesNotRecurse();
4816 CodeGenFunction CGF(CGM);
4817 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4818
4819 // Dst = (void*[n])(LHSArg);
4820 // Src = (void*[n])(RHSArg);
4821 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4823 CGF.Builder.getPtrTy(0)),
4824 ArgsElemType, CGF.getPointerAlign());
4825 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4827 CGF.Builder.getPtrTy(0)),
4828 ArgsElemType, CGF.getPointerAlign());
4829
4830 // ...
4831 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4832 // ...
4833 CodeGenFunction::OMPPrivateScope Scope(CGF);
4834 const auto *IPriv = Privates.begin();
4835 unsigned Idx = 0;
4836 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4837 const auto *RHSVar =
4838 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4839 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4840 const auto *LHSVar =
4841 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4842 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4843 QualType PrivTy = (*IPriv)->getType();
4844 if (PrivTy->isVariablyModifiedType()) {
4845 // Get array size and emit VLA type.
4846 ++Idx;
4847 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4848 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4849 const VariableArrayType *VLA =
4850 CGF.getContext().getAsVariableArrayType(PrivTy);
4851 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4853 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4854 CGF.EmitVariablyModifiedType(PrivTy);
4855 }
4856 }
4857 Scope.Privatize();
4858 IPriv = Privates.begin();
4859 const auto *ILHS = LHSExprs.begin();
4860 const auto *IRHS = RHSExprs.begin();
4861 for (const Expr *E : ReductionOps) {
4862 if ((*IPriv)->getType()->isArrayType()) {
4863 // Emit reduction for array section.
4864 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4865 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4866 EmitOMPAggregateReduction(
4867 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4868 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4869 emitReductionCombiner(CGF, E);
4870 });
4871 } else {
4872 // Emit reduction for array subscript or single variable.
4873 emitReductionCombiner(CGF, E);
4874 }
4875 ++IPriv;
4876 ++ILHS;
4877 ++IRHS;
4878 }
4879 Scope.ForceCleanup();
4880 CGF.FinishFunction();
4881 return Fn;
4882 }
4883
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)4884 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4885 const Expr *ReductionOp,
4886 const Expr *PrivateRef,
4887 const DeclRefExpr *LHS,
4888 const DeclRefExpr *RHS) {
4889 if (PrivateRef->getType()->isArrayType()) {
4890 // Emit reduction for array section.
4891 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4892 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4893 EmitOMPAggregateReduction(
4894 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4895 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896 emitReductionCombiner(CGF, ReductionOp);
4897 });
4898 } else {
4899 // Emit reduction for array subscript or single variable.
4900 emitReductionCombiner(CGF, ReductionOp);
4901 }
4902 }
4903
4904 static std::string generateUniqueName(CodeGenModule &CGM,
4905 llvm::StringRef Prefix, const Expr *Ref);
4906
emitPrivateReduction(CodeGenFunction & CGF,SourceLocation Loc,const Expr * Privates,const Expr * LHSExprs,const Expr * RHSExprs,const Expr * ReductionOps)4907 void CGOpenMPRuntime::emitPrivateReduction(
4908 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4909 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4910
4911 // Create a shared global variable (__shared_reduction_var) to accumulate the
4912 // final result.
4913 //
4914 // Call __kmpc_barrier to synchronize threads before initialization.
4915 //
4916 // The master thread (thread_id == 0) initializes __shared_reduction_var
4917 // with the identity value or initializer.
4918 //
4919 // Call __kmpc_barrier to synchronize before combining.
4920 // For each i:
4921 // - Thread enters critical section.
4922 // - Reads its private value from LHSExprs[i].
4923 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4924 // Privates[i]).
4925 // - Exits critical section.
4926 //
4927 // Call __kmpc_barrier after combining.
4928 //
4929 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4930 //
4931 // Final __kmpc_barrier to synchronize after broadcasting
4932 QualType PrivateType = Privates->getType();
4933 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4934
4935 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4936 std::string ReductionVarNameStr;
4937 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4938 ReductionVarNameStr =
4939 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4940 else
4941 ReductionVarNameStr = "unnamed_priv_var";
4942
4943 // Create an internal shared variable
4944 std::string SharedName =
4945 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4946 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4947 LLVMType, ".omp.reduction." + SharedName);
4948
4949 SharedVar->setAlignment(
4950 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4951
4952 Address SharedResult =
4953 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4954
4955 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4956 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4957 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4958
4959 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4960 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4961
4962 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4963 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4964 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4965
4966 CGF.EmitBlock(InitBB);
4967
4968 auto EmitSharedInit = [&]() {
4969 if (UDR) { // Check if it's a User-Defined Reduction
4970 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4971 std::pair<llvm::Function *, llvm::Function *> FnPair =
4972 getUserDefinedReduction(UDR);
4973 llvm::Function *InitializerFn = FnPair.second;
4974 if (InitializerFn) {
4975 if (const auto *CE =
4976 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
4977 const auto *OutDRE = cast<DeclRefExpr>(
4978 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
4979 ->getSubExpr());
4980 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
4981
4982 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4983 LocalScope.addPrivate(OutVD, SharedResult);
4984
4985 (void)LocalScope.Privatize();
4986 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4987 CE->getCallee()->IgnoreParenImpCasts())) {
4988 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4989 CGF, OVE, RValue::get(InitializerFn));
4990 CGF.EmitIgnoredExpr(CE);
4991 } else {
4992 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4993 PrivateType.getQualifiers(),
4994 /*IsInitializer=*/true);
4995 }
4996 } else {
4997 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4998 PrivateType.getQualifiers(),
4999 /*IsInitializer=*/true);
5000 }
5001 } else {
5002 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5003 PrivateType.getQualifiers(),
5004 /*IsInitializer=*/true);
5005 }
5006 } else {
5007 // EmitNullInitialization handles default construction for C++ classes
5008 // and zeroing for scalars, which is a reasonable default.
5009 CGF.EmitNullInitialization(SharedResult, PrivateType);
5010 }
5011 return; // UDR initialization handled
5012 }
5013 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5014 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5015 if (const Expr *InitExpr = VD->getInit()) {
5016 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5017 PrivateType.getQualifiers(), true);
5018 return;
5019 }
5020 }
5021 }
5022 CGF.EmitNullInitialization(SharedResult, PrivateType);
5023 };
5024 EmitSharedInit();
5025 CGF.Builder.CreateBr(InitEndBB);
5026 CGF.EmitBlock(InitEndBB);
5027
5028 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5029 CGM.getModule(), OMPRTL___kmpc_barrier),
5030 BarrierArgs);
5031
5032 const Expr *ReductionOp = ReductionOps;
5033 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5034 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5035 LValue LHSLV = CGF.EmitLValue(Privates);
5036
5037 auto EmitCriticalReduction = [&](auto ReductionGen) {
5038 std::string CriticalName = getName({"reduction_critical"});
5039 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5040 };
5041
5042 if (CurrentUDR) {
5043 // Handle user-defined reduction.
5044 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5045 Action.Enter(CGF);
5046 std::pair<llvm::Function *, llvm::Function *> FnPair =
5047 getUserDefinedReduction(CurrentUDR);
5048 if (FnPair.first) {
5049 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5050 const auto *OutDRE = cast<DeclRefExpr>(
5051 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5052 ->getSubExpr());
5053 const auto *InDRE = cast<DeclRefExpr>(
5054 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5055 ->getSubExpr());
5056 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5057 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5058 SharedLV.getAddress());
5059 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5060 LHSLV.getAddress());
5061 (void)LocalScope.Privatize();
5062 emitReductionCombiner(CGF, ReductionOp);
5063 }
5064 }
5065 };
5066 EmitCriticalReduction(ReductionGen);
5067 } else {
5068 // Handle built-in reduction operations.
5069 #ifndef NDEBUG
5070 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5071 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5072 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5073
5074 const Expr *AssignRHS = nullptr;
5075 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5076 if (BinOp->getOpcode() == BO_Assign)
5077 AssignRHS = BinOp->getRHS();
5078 } else if (const auto *OpCall =
5079 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5080 if (OpCall->getOperator() == OO_Equal)
5081 AssignRHS = OpCall->getArg(1);
5082 }
5083
5084 assert(AssignRHS &&
5085 "Private Variable Reduction : Invalid ReductionOp expression");
5086 #endif
5087
5088 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5089 Action.Enter(CGF);
5090 const auto *OmpOutDRE =
5091 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5092 const auto *OmpInDRE =
5093 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5094 assert(
5095 OmpOutDRE && OmpInDRE &&
5096 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5097 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5098 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5099 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5100 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5101 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5102 (void)LocalScope.Privatize();
5103 // Emit the actual reduction operation
5104 CGF.EmitIgnoredExpr(ReductionOp);
5105 };
5106 EmitCriticalReduction(ReductionGen);
5107 }
5108
5109 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5110 CGM.getModule(), OMPRTL___kmpc_barrier),
5111 BarrierArgs);
5112
5113 // Broadcast final result
5114 bool IsAggregate = PrivateType->isAggregateType();
5115 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5116 llvm::Value *FinalResultVal = nullptr;
5117 Address FinalResultAddr = Address::invalid();
5118
5119 if (IsAggregate)
5120 FinalResultAddr = SharedResult;
5121 else
5122 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5123
5124 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5125 if (IsAggregate) {
5126 CGF.EmitAggregateCopy(TargetLHSLV,
5127 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5128 PrivateType, AggValueSlot::DoesNotOverlap, false);
5129 } else {
5130 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5131 }
5132 // Final synchronization barrier
5133 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5134 CGM.getModule(), OMPRTL___kmpc_barrier),
5135 BarrierArgs);
5136
5137 // Combiner with original list item
5138 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5139 PrePostActionTy &Action) {
5140 Action.Enter(CGF);
5141 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5142 cast<DeclRefExpr>(LHSExprs),
5143 cast<DeclRefExpr>(RHSExprs));
5144 };
5145 EmitCriticalReduction(OriginalListCombiner);
5146 }
5147
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > OrgPrivates,ArrayRef<const Expr * > OrgLHSExprs,ArrayRef<const Expr * > OrgRHSExprs,ArrayRef<const Expr * > OrgReductionOps,ReductionOptionsTy Options)5148 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5149 ArrayRef<const Expr *> OrgPrivates,
5150 ArrayRef<const Expr *> OrgLHSExprs,
5151 ArrayRef<const Expr *> OrgRHSExprs,
5152 ArrayRef<const Expr *> OrgReductionOps,
5153 ReductionOptionsTy Options) {
5154 if (!CGF.HaveInsertPoint())
5155 return;
5156
5157 bool WithNowait = Options.WithNowait;
5158 bool SimpleReduction = Options.SimpleReduction;
5159
5160 // Next code should be emitted for reduction:
5161 //
5162 // static kmp_critical_name lock = { 0 };
5163 //
5164 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5165 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5166 // ...
5167 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5168 // *(Type<n>-1*)rhs[<n>-1]);
5169 // }
5170 //
5171 // ...
5172 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5173 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5174 // RedList, reduce_func, &<lock>)) {
5175 // case 1:
5176 // ...
5177 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5178 // ...
5179 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5180 // break;
5181 // case 2:
5182 // ...
5183 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5184 // ...
5185 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5186 // break;
5187 // default:;
5188 // }
5189 //
5190 // if SimpleReduction is true, only the next code is generated:
5191 // ...
5192 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193 // ...
5194
5195 ASTContext &C = CGM.getContext();
5196
5197 if (SimpleReduction) {
5198 CodeGenFunction::RunCleanupsScope Scope(CGF);
5199 const auto *IPriv = OrgPrivates.begin();
5200 const auto *ILHS = OrgLHSExprs.begin();
5201 const auto *IRHS = OrgRHSExprs.begin();
5202 for (const Expr *E : OrgReductionOps) {
5203 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5204 cast<DeclRefExpr>(*IRHS));
5205 ++IPriv;
5206 ++ILHS;
5207 ++IRHS;
5208 }
5209 return;
5210 }
5211
5212 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5213 // Only keep entries where the corresponding variable is not private.
5214 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5215 FilteredRHSExprs, FilteredReductionOps;
5216 for (unsigned I : llvm::seq<unsigned>(
5217 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5218 if (!Options.IsPrivateVarReduction[I]) {
5219 FilteredPrivates.emplace_back(OrgPrivates[I]);
5220 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5221 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5222 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5223 }
5224 }
5225 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5226 // processing.
5227 ArrayRef<const Expr *> Privates = FilteredPrivates;
5228 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5229 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5230 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5231
5232 // 1. Build a list of reduction variables.
5233 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5234 auto Size = RHSExprs.size();
5235 for (const Expr *E : Privates) {
5236 if (E->getType()->isVariablyModifiedType())
5237 // Reserve place for array size.
5238 ++Size;
5239 }
5240 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5241 QualType ReductionArrayTy = C.getConstantArrayType(
5242 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5243 /*IndexTypeQuals=*/0);
5244 RawAddress ReductionList =
5245 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5246 const auto *IPriv = Privates.begin();
5247 unsigned Idx = 0;
5248 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5249 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5250 CGF.Builder.CreateStore(
5251 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5253 Elem);
5254 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5255 // Store array size.
5256 ++Idx;
5257 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5258 llvm::Value *Size = CGF.Builder.CreateIntCast(
5259 CGF.getVLASize(
5260 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5261 .NumElts,
5262 CGF.SizeTy, /*isSigned=*/false);
5263 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5264 Elem);
5265 }
5266 }
5267
5268 // 2. Emit reduce_func().
5269 llvm::Function *ReductionFn = emitReductionFunction(
5270 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5271 Privates, LHSExprs, RHSExprs, ReductionOps);
5272
5273 // 3. Create static kmp_critical_name lock = { 0 };
5274 std::string Name = getName({"reduction"});
5275 llvm::Value *Lock = getCriticalRegionLock(Name);
5276
5277 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5278 // RedList, reduce_func, &<lock>);
5279 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5280 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5281 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5282 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283 ReductionList.getPointer(), CGF.VoidPtrTy);
5284 llvm::Value *Args[] = {
5285 IdentTLoc, // ident_t *<loc>
5286 ThreadId, // i32 <gtid>
5287 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5288 ReductionArrayTySize, // size_type sizeof(RedList)
5289 RL, // void *RedList
5290 ReductionFn, // void (*) (void *, void *) <reduce_func>
5291 Lock // kmp_critical_name *&<lock>
5292 };
5293 llvm::Value *Res = CGF.EmitRuntimeCall(
5294 OMPBuilder.getOrCreateRuntimeFunction(
5295 CGM.getModule(),
5296 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5297 Args);
5298
5299 // 5. Build switch(res)
5300 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5301 llvm::SwitchInst *SwInst =
5302 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5303
5304 // 6. Build case 1:
5305 // ...
5306 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5307 // ...
5308 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5309 // break;
5310 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5311 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5312 CGF.EmitBlock(Case1BB);
5313
5314 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315 llvm::Value *EndArgs[] = {
5316 IdentTLoc, // ident_t *<loc>
5317 ThreadId, // i32 <gtid>
5318 Lock // kmp_critical_name *&<lock>
5319 };
5320 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5321 CodeGenFunction &CGF, PrePostActionTy &Action) {
5322 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5323 const auto *IPriv = Privates.begin();
5324 const auto *ILHS = LHSExprs.begin();
5325 const auto *IRHS = RHSExprs.begin();
5326 for (const Expr *E : ReductionOps) {
5327 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5328 cast<DeclRefExpr>(*IRHS));
5329 ++IPriv;
5330 ++ILHS;
5331 ++IRHS;
5332 }
5333 };
5334 RegionCodeGenTy RCG(CodeGen);
5335 CommonActionTy Action(
5336 nullptr, {},
5337 OMPBuilder.getOrCreateRuntimeFunction(
5338 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5339 : OMPRTL___kmpc_end_reduce),
5340 EndArgs);
5341 RCG.setAction(Action);
5342 RCG(CGF);
5343
5344 CGF.EmitBranch(DefaultBB);
5345
5346 // 7. Build case 2:
5347 // ...
5348 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5349 // ...
5350 // break;
5351 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5352 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5353 CGF.EmitBlock(Case2BB);
5354
5355 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5356 CodeGenFunction &CGF, PrePostActionTy &Action) {
5357 const auto *ILHS = LHSExprs.begin();
5358 const auto *IRHS = RHSExprs.begin();
5359 const auto *IPriv = Privates.begin();
5360 for (const Expr *E : ReductionOps) {
5361 const Expr *XExpr = nullptr;
5362 const Expr *EExpr = nullptr;
5363 const Expr *UpExpr = nullptr;
5364 BinaryOperatorKind BO = BO_Comma;
5365 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5366 if (BO->getOpcode() == BO_Assign) {
5367 XExpr = BO->getLHS();
5368 UpExpr = BO->getRHS();
5369 }
5370 }
5371 // Try to emit update expression as a simple atomic.
5372 const Expr *RHSExpr = UpExpr;
5373 if (RHSExpr) {
5374 // Analyze RHS part of the whole expression.
5375 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5376 RHSExpr->IgnoreParenImpCasts())) {
5377 // If this is a conditional operator, analyze its condition for
5378 // min/max reduction operator.
5379 RHSExpr = ACO->getCond();
5380 }
5381 if (const auto *BORHS =
5382 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5383 EExpr = BORHS->getRHS();
5384 BO = BORHS->getOpcode();
5385 }
5386 }
5387 if (XExpr) {
5388 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5389 auto &&AtomicRedGen = [BO, VD,
5390 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5391 const Expr *EExpr, const Expr *UpExpr) {
5392 LValue X = CGF.EmitLValue(XExpr);
5393 RValue E;
5394 if (EExpr)
5395 E = CGF.EmitAnyExpr(EExpr);
5396 CGF.EmitOMPAtomicSimpleUpdateExpr(
5397 X, E, BO, /*IsXLHSInRHSPart=*/true,
5398 llvm::AtomicOrdering::Monotonic, Loc,
5399 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5400 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5401 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5402 CGF.emitOMPSimpleStore(
5403 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5404 VD->getType().getNonReferenceType(), Loc);
5405 PrivateScope.addPrivate(VD, LHSTemp);
5406 (void)PrivateScope.Privatize();
5407 return CGF.EmitAnyExpr(UpExpr);
5408 });
5409 };
5410 if ((*IPriv)->getType()->isArrayType()) {
5411 // Emit atomic reduction for array section.
5412 const auto *RHSVar =
5413 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5414 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5415 AtomicRedGen, XExpr, EExpr, UpExpr);
5416 } else {
5417 // Emit atomic reduction for array subscript or single variable.
5418 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5419 }
5420 } else {
5421 // Emit as a critical region.
5422 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5423 const Expr *, const Expr *) {
5424 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5425 std::string Name = RT.getName({"atomic_reduction"});
5426 RT.emitCriticalRegion(
5427 CGF, Name,
5428 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5429 Action.Enter(CGF);
5430 emitReductionCombiner(CGF, E);
5431 },
5432 Loc);
5433 };
5434 if ((*IPriv)->getType()->isArrayType()) {
5435 const auto *LHSVar =
5436 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5437 const auto *RHSVar =
5438 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5439 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5440 CritRedGen);
5441 } else {
5442 CritRedGen(CGF, nullptr, nullptr, nullptr);
5443 }
5444 }
5445 ++ILHS;
5446 ++IRHS;
5447 ++IPriv;
5448 }
5449 };
5450 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5451 if (!WithNowait) {
5452 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5453 llvm::Value *EndArgs[] = {
5454 IdentTLoc, // ident_t *<loc>
5455 ThreadId, // i32 <gtid>
5456 Lock // kmp_critical_name *&<lock>
5457 };
5458 CommonActionTy Action(nullptr, {},
5459 OMPBuilder.getOrCreateRuntimeFunction(
5460 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5461 EndArgs);
5462 AtomicRCG.setAction(Action);
5463 AtomicRCG(CGF);
5464 } else {
5465 AtomicRCG(CGF);
5466 }
5467
5468 CGF.EmitBranch(DefaultBB);
5469 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5470 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5471 "PrivateVarReduction: Privates size mismatch");
5472 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5473 "PrivateVarReduction: ReductionOps size mismatch");
5474 for (unsigned I : llvm::seq<unsigned>(
5475 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5476 if (Options.IsPrivateVarReduction[I])
5477 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5478 OrgRHSExprs[I], OrgReductionOps[I]);
5479 }
5480 }
5481
5482 /// Generates unique name for artificial threadprivate variables.
5483 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5484 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5485 const Expr *Ref) {
5486 SmallString<256> Buffer;
5487 llvm::raw_svector_ostream Out(Buffer);
5488 const clang::DeclRefExpr *DE;
5489 const VarDecl *D = ::getBaseDecl(Ref, DE);
5490 if (!D)
5491 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5492 D = D->getCanonicalDecl();
5493 std::string Name = CGM.getOpenMPRuntime().getName(
5494 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5495 Out << Prefix << Name << "_"
5496 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5497 return std::string(Out.str());
5498 }
5499
5500 /// Emits reduction initializer function:
5501 /// \code
5502 /// void @.red_init(void* %arg, void* %orig) {
5503 /// %0 = bitcast void* %arg to <type>*
5504 /// store <type> <init>, <type>* %0
5505 /// ret void
5506 /// }
5507 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5508 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5509 SourceLocation Loc,
5510 ReductionCodeGen &RCG, unsigned N) {
5511 ASTContext &C = CGM.getContext();
5512 QualType VoidPtrTy = C.VoidPtrTy;
5513 VoidPtrTy.addRestrict();
5514 FunctionArgList Args;
5515 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5516 ImplicitParamKind::Other);
5517 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5518 ImplicitParamKind::Other);
5519 Args.emplace_back(&Param);
5520 Args.emplace_back(&ParamOrig);
5521 const auto &FnInfo =
5522 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5523 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5524 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5525 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5526 Name, &CGM.getModule());
5527 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5528 Fn->setDoesNotRecurse();
5529 CodeGenFunction CGF(CGM);
5530 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5531 QualType PrivateType = RCG.getPrivateType(N);
5532 Address PrivateAddr = CGF.EmitLoadOfPointer(
5533 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5534 C.getPointerType(PrivateType)->castAs<PointerType>());
5535 llvm::Value *Size = nullptr;
5536 // If the size of the reduction item is non-constant, load it from global
5537 // threadprivate variable.
5538 if (RCG.getSizes(N).second) {
5539 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540 CGF, CGM.getContext().getSizeType(),
5541 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5542 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5543 CGM.getContext().getSizeType(), Loc);
5544 }
5545 RCG.emitAggregateType(CGF, N, Size);
5546 Address OrigAddr = Address::invalid();
5547 // If initializer uses initializer from declare reduction construct, emit a
5548 // pointer to the address of the original reduction item (reuired by reduction
5549 // initializer)
5550 if (RCG.usesReductionInitializer(N)) {
5551 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5552 OrigAddr = CGF.EmitLoadOfPointer(
5553 SharedAddr,
5554 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5555 }
5556 // Emit the initializer:
5557 // %0 = bitcast void* %arg to <type>*
5558 // store <type> <init>, <type>* %0
5559 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5560 [](CodeGenFunction &) { return false; });
5561 CGF.FinishFunction();
5562 return Fn;
5563 }
5564
5565 /// Emits reduction combiner function:
5566 /// \code
5567 /// void @.red_comb(void* %arg0, void* %arg1) {
5568 /// %lhs = bitcast void* %arg0 to <type>*
5569 /// %rhs = bitcast void* %arg1 to <type>*
5570 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5571 /// store <type> %2, <type>* %lhs
5572 /// ret void
5573 /// }
5574 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5575 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5576 SourceLocation Loc,
5577 ReductionCodeGen &RCG, unsigned N,
5578 const Expr *ReductionOp,
5579 const Expr *LHS, const Expr *RHS,
5580 const Expr *PrivateRef) {
5581 ASTContext &C = CGM.getContext();
5582 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5583 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5584 FunctionArgList Args;
5585 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5586 C.VoidPtrTy, ImplicitParamKind::Other);
5587 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5588 ImplicitParamKind::Other);
5589 Args.emplace_back(&ParamInOut);
5590 Args.emplace_back(&ParamIn);
5591 const auto &FnInfo =
5592 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5593 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5594 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5595 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5596 Name, &CGM.getModule());
5597 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5598 Fn->setDoesNotRecurse();
5599 CodeGenFunction CGF(CGM);
5600 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5601 llvm::Value *Size = nullptr;
5602 // If the size of the reduction item is non-constant, load it from global
5603 // threadprivate variable.
5604 if (RCG.getSizes(N).second) {
5605 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5606 CGF, CGM.getContext().getSizeType(),
5607 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5608 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5609 CGM.getContext().getSizeType(), Loc);
5610 }
5611 RCG.emitAggregateType(CGF, N, Size);
5612 // Remap lhs and rhs variables to the addresses of the function arguments.
5613 // %lhs = bitcast void* %arg0 to <type>*
5614 // %rhs = bitcast void* %arg1 to <type>*
5615 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5616 PrivateScope.addPrivate(
5617 LHSVD,
5618 // Pull out the pointer to the variable.
5619 CGF.EmitLoadOfPointer(
5620 CGF.GetAddrOfLocalVar(&ParamInOut)
5621 .withElementType(CGF.Builder.getPtrTy(0)),
5622 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5623 PrivateScope.addPrivate(
5624 RHSVD,
5625 // Pull out the pointer to the variable.
5626 CGF.EmitLoadOfPointer(
5627 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5628 CGF.Builder.getPtrTy(0)),
5629 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5630 PrivateScope.Privatize();
5631 // Emit the combiner body:
5632 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5633 // store <type> %2, <type>* %lhs
5634 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5635 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5636 cast<DeclRefExpr>(RHS));
5637 CGF.FinishFunction();
5638 return Fn;
5639 }
5640
5641 /// Emits reduction finalizer function:
5642 /// \code
5643 /// void @.red_fini(void* %arg) {
5644 /// %0 = bitcast void* %arg to <type>*
5645 /// <destroy>(<type>* %0)
5646 /// ret void
5647 /// }
5648 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5649 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5650 SourceLocation Loc,
5651 ReductionCodeGen &RCG, unsigned N) {
5652 if (!RCG.needCleanups(N))
5653 return nullptr;
5654 ASTContext &C = CGM.getContext();
5655 FunctionArgList Args;
5656 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5657 ImplicitParamKind::Other);
5658 Args.emplace_back(&Param);
5659 const auto &FnInfo =
5660 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5661 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5662 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5663 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5664 Name, &CGM.getModule());
5665 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5666 Fn->setDoesNotRecurse();
5667 CodeGenFunction CGF(CGM);
5668 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5669 Address PrivateAddr = CGF.EmitLoadOfPointer(
5670 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5671 llvm::Value *Size = nullptr;
5672 // If the size of the reduction item is non-constant, load it from global
5673 // threadprivate variable.
5674 if (RCG.getSizes(N).second) {
5675 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5676 CGF, CGM.getContext().getSizeType(),
5677 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5678 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5679 CGM.getContext().getSizeType(), Loc);
5680 }
5681 RCG.emitAggregateType(CGF, N, Size);
5682 // Emit the finalizer body:
5683 // <destroy>(<type>* %0)
5684 RCG.emitCleanups(CGF, N, PrivateAddr);
5685 CGF.FinishFunction(Loc);
5686 return Fn;
5687 }
5688
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5689 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5690 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5691 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5692 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5693 return nullptr;
5694
5695 // Build typedef struct:
5696 // kmp_taskred_input {
5697 // void *reduce_shar; // shared reduction item
5698 // void *reduce_orig; // original reduction item used for initialization
5699 // size_t reduce_size; // size of data item
5700 // void *reduce_init; // data initialization routine
5701 // void *reduce_fini; // data finalization routine
5702 // void *reduce_comb; // data combiner routine
5703 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5704 // } kmp_taskred_input_t;
5705 ASTContext &C = CGM.getContext();
5706 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5707 RD->startDefinition();
5708 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5709 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5710 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5711 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5712 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5713 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5714 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5715 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5716 RD->completeDefinition();
5717 QualType RDType = C.getRecordType(RD);
5718 unsigned Size = Data.ReductionVars.size();
5719 llvm::APInt ArraySize(/*numBits=*/64, Size);
5720 QualType ArrayRDType =
5721 C.getConstantArrayType(RDType, ArraySize, nullptr,
5722 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5723 // kmp_task_red_input_t .rd_input.[Size];
5724 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5725 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5726 Data.ReductionCopies, Data.ReductionOps);
5727 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5728 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5729 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5730 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5731 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5732 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5733 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5734 ".rd_input.gep.");
5735 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5736 // ElemLVal.reduce_shar = &Shareds[Cnt];
5737 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5738 RCG.emitSharedOrigLValue(CGF, Cnt);
5739 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5740 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5741 // ElemLVal.reduce_orig = &Origs[Cnt];
5742 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5743 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5744 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5745 RCG.emitAggregateType(CGF, Cnt);
5746 llvm::Value *SizeValInChars;
5747 llvm::Value *SizeVal;
5748 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5749 // We use delayed creation/initialization for VLAs and array sections. It is
5750 // required because runtime does not provide the way to pass the sizes of
5751 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5752 // threadprivate global variables are used to store these values and use
5753 // them in the functions.
5754 bool DelayedCreation = !!SizeVal;
5755 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5756 /*isSigned=*/false);
5757 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5758 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5759 // ElemLVal.reduce_init = init;
5760 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5761 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5762 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5763 // ElemLVal.reduce_fini = fini;
5764 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5765 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5766 llvm::Value *FiniAddr =
5767 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5768 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5769 // ElemLVal.reduce_comb = comb;
5770 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5771 llvm::Value *CombAddr = emitReduceCombFunction(
5772 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5773 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5774 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5775 // ElemLVal.flags = 0;
5776 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5777 if (DelayedCreation) {
5778 CGF.EmitStoreOfScalar(
5779 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5780 FlagsLVal);
5781 } else
5782 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5783 }
5784 if (Data.IsReductionWithTaskMod) {
5785 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5786 // is_ws, int num, void *data);
5787 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5788 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5789 CGM.IntTy, /*isSigned=*/true);
5790 llvm::Value *Args[] = {
5791 IdentTLoc, GTid,
5792 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5793 /*isSigned=*/true),
5794 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5795 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5796 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5797 return CGF.EmitRuntimeCall(
5798 OMPBuilder.getOrCreateRuntimeFunction(
5799 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5800 Args);
5801 }
5802 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5803 llvm::Value *Args[] = {
5804 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5805 /*isSigned=*/true),
5806 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5807 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5808 CGM.VoidPtrTy)};
5809 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5810 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5811 Args);
5812 }
5813
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)5814 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5815 SourceLocation Loc,
5816 bool IsWorksharingReduction) {
5817 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5818 // is_ws, int num, void *data);
5819 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5820 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5821 CGM.IntTy, /*isSigned=*/true);
5822 llvm::Value *Args[] = {IdentTLoc, GTid,
5823 llvm::ConstantInt::get(CGM.IntTy,
5824 IsWorksharingReduction ? 1 : 0,
5825 /*isSigned=*/true)};
5826 (void)CGF.EmitRuntimeCall(
5827 OMPBuilder.getOrCreateRuntimeFunction(
5828 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5829 Args);
5830 }
5831
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5832 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5833 SourceLocation Loc,
5834 ReductionCodeGen &RCG,
5835 unsigned N) {
5836 auto Sizes = RCG.getSizes(N);
5837 // Emit threadprivate global variable if the type is non-constant
5838 // (Sizes.second = nullptr).
5839 if (Sizes.second) {
5840 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5841 /*isSigned=*/false);
5842 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5843 CGF, CGM.getContext().getSizeType(),
5844 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5845 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5846 }
5847 }
5848
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)5849 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5850 SourceLocation Loc,
5851 llvm::Value *ReductionsPtr,
5852 LValue SharedLVal) {
5853 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5854 // *d);
5855 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5856 CGM.IntTy,
5857 /*isSigned=*/true),
5858 ReductionsPtr,
5859 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5860 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5861 return Address(
5862 CGF.EmitRuntimeCall(
5863 OMPBuilder.getOrCreateRuntimeFunction(
5864 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5865 Args),
5866 CGF.Int8Ty, SharedLVal.getAlignment());
5867 }
5868
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)5869 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5870 const OMPTaskDataTy &Data) {
5871 if (!CGF.HaveInsertPoint())
5872 return;
5873
5874 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5875 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5876 OMPBuilder.createTaskwait(CGF.Builder);
5877 } else {
5878 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5879 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5880 auto &M = CGM.getModule();
5881 Address DependenciesArray = Address::invalid();
5882 llvm::Value *NumOfElements;
5883 std::tie(NumOfElements, DependenciesArray) =
5884 emitDependClause(CGF, Data.Dependences, Loc);
5885 if (!Data.Dependences.empty()) {
5886 llvm::Value *DepWaitTaskArgs[7];
5887 DepWaitTaskArgs[0] = UpLoc;
5888 DepWaitTaskArgs[1] = ThreadID;
5889 DepWaitTaskArgs[2] = NumOfElements;
5890 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5891 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5892 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5893 DepWaitTaskArgs[6] =
5894 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5895
5896 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5897
5898 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5899 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5900 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5901 // kmp_int32 has_no_wait); if dependence info is specified.
5902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5903 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5904 DepWaitTaskArgs);
5905
5906 } else {
5907
5908 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5909 // global_tid);
5910 llvm::Value *Args[] = {UpLoc, ThreadID};
5911 // Ignore return result until untied tasks are supported.
5912 CGF.EmitRuntimeCall(
5913 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5914 Args);
5915 }
5916 }
5917
5918 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5919 Region->emitUntiedSwitch(CGF);
5920 }
5921
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)5922 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5923 OpenMPDirectiveKind InnerKind,
5924 const RegionCodeGenTy &CodeGen,
5925 bool HasCancel) {
5926 if (!CGF.HaveInsertPoint())
5927 return;
5928 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5929 InnerKind != OMPD_critical &&
5930 InnerKind != OMPD_master &&
5931 InnerKind != OMPD_masked);
5932 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5933 }
5934
5935 namespace {
5936 enum RTCancelKind {
5937 CancelNoreq = 0,
5938 CancelParallel = 1,
5939 CancelLoop = 2,
5940 CancelSections = 3,
5941 CancelTaskgroup = 4
5942 };
5943 } // anonymous namespace
5944
getCancellationKind(OpenMPDirectiveKind CancelRegion)5945 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5946 RTCancelKind CancelKind = CancelNoreq;
5947 if (CancelRegion == OMPD_parallel)
5948 CancelKind = CancelParallel;
5949 else if (CancelRegion == OMPD_for)
5950 CancelKind = CancelLoop;
5951 else if (CancelRegion == OMPD_sections)
5952 CancelKind = CancelSections;
5953 else {
5954 assert(CancelRegion == OMPD_taskgroup);
5955 CancelKind = CancelTaskgroup;
5956 }
5957 return CancelKind;
5958 }
5959
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)5960 void CGOpenMPRuntime::emitCancellationPointCall(
5961 CodeGenFunction &CGF, SourceLocation Loc,
5962 OpenMPDirectiveKind CancelRegion) {
5963 if (!CGF.HaveInsertPoint())
5964 return;
5965 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5966 // global_tid, kmp_int32 cncl_kind);
5967 if (auto *OMPRegionInfo =
5968 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5969 // For 'cancellation point taskgroup', the task region info may not have a
5970 // cancel. This may instead happen in another adjacent task.
5971 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5972 llvm::Value *Args[] = {
5973 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5974 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5975 // Ignore return result until untied tasks are supported.
5976 llvm::Value *Result = CGF.EmitRuntimeCall(
5977 OMPBuilder.getOrCreateRuntimeFunction(
5978 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5979 Args);
5980 // if (__kmpc_cancellationpoint()) {
5981 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5982 // exit from construct;
5983 // }
5984 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5985 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5986 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5987 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5988 CGF.EmitBlock(ExitBB);
5989 if (CancelRegion == OMPD_parallel)
5990 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5991 // exit from construct;
5992 CodeGenFunction::JumpDest CancelDest =
5993 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5994 CGF.EmitBranchThroughCleanup(CancelDest);
5995 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5996 }
5997 }
5998 }
5999
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6000 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6001 const Expr *IfCond,
6002 OpenMPDirectiveKind CancelRegion) {
6003 if (!CGF.HaveInsertPoint())
6004 return;
6005 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6006 // kmp_int32 cncl_kind);
6007 auto &M = CGM.getModule();
6008 if (auto *OMPRegionInfo =
6009 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6010 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6011 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6012 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6013 llvm::Value *Args[] = {
6014 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6015 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6016 // Ignore return result until untied tasks are supported.
6017 llvm::Value *Result = CGF.EmitRuntimeCall(
6018 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6019 // if (__kmpc_cancel()) {
6020 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021 // exit from construct;
6022 // }
6023 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6024 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6025 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6026 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6027 CGF.EmitBlock(ExitBB);
6028 if (CancelRegion == OMPD_parallel)
6029 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6030 // exit from construct;
6031 CodeGenFunction::JumpDest CancelDest =
6032 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6033 CGF.EmitBranchThroughCleanup(CancelDest);
6034 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6035 };
6036 if (IfCond) {
6037 emitIfClause(CGF, IfCond, ThenGen,
6038 [](CodeGenFunction &, PrePostActionTy &) {});
6039 } else {
6040 RegionCodeGenTy ThenRCG(ThenGen);
6041 ThenRCG(CGF);
6042 }
6043 }
6044 }
6045
6046 namespace {
6047 /// Cleanup action for uses_allocators support.
6048 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6049 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6050
6051 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6052 OMPUsesAllocatorsActionTy(
6053 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6054 : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6055 void Enter(CodeGenFunction &CGF) override {
6056 if (!CGF.HaveInsertPoint())
6057 return;
6058 for (const auto &AllocatorData : Allocators) {
6059 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6060 CGF, AllocatorData.first, AllocatorData.second);
6061 }
6062 }
Exit(CodeGenFunction & CGF)6063 void Exit(CodeGenFunction &CGF) override {
6064 if (!CGF.HaveInsertPoint())
6065 return;
6066 for (const auto &AllocatorData : Allocators) {
6067 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6068 AllocatorData.first);
6069 }
6070 }
6071 };
6072 } // namespace
6073
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6074 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6075 const OMPExecutableDirective &D, StringRef ParentName,
6076 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6077 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6078 assert(!ParentName.empty() && "Invalid target entry parent name!");
6079 HasEmittedTargetRegion = true;
6080 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6081 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6082 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6083 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6084 if (!D.AllocatorTraits)
6085 continue;
6086 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6087 }
6088 }
6089 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6090 CodeGen.setAction(UsesAllocatorAction);
6091 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6092 IsOffloadEntry, CodeGen);
6093 }
6094
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6095 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6096 const Expr *Allocator,
6097 const Expr *AllocatorTraits) {
6098 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6099 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6100 // Use default memspace handle.
6101 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6102 llvm::Value *NumTraits = llvm::ConstantInt::get(
6103 CGF.IntTy, cast<ConstantArrayType>(
6104 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6105 ->getSize()
6106 .getLimitedValue());
6107 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6108 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6110 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6111 AllocatorTraitsLVal.getBaseInfo(),
6112 AllocatorTraitsLVal.getTBAAInfo());
6113 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6114
6115 llvm::Value *AllocatorVal =
6116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6117 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6118 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6119 // Store to allocator.
6120 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6121 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6122 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6123 AllocatorVal =
6124 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6125 Allocator->getType(), Allocator->getExprLoc());
6126 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6127 }
6128
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6129 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6130 const Expr *Allocator) {
6131 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6132 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6133 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6134 llvm::Value *AllocatorVal =
6135 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6136 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6137 CGF.getContext().VoidPtrTy,
6138 Allocator->getExprLoc());
6139 (void)CGF.EmitRuntimeCall(
6140 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6141 OMPRTL___kmpc_destroy_allocator),
6142 {ThreadId, AllocatorVal});
6143 }
6144
computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective & D,CodeGenFunction & CGF,llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs & Attrs)6145 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6146 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6147 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6148 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6149 "invalid default attrs structure");
6150 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6151 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6152
6153 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6154 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6155 /*UpperBoundOnly=*/true);
6156
6157 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6158 for (auto *A : C->getAttrs()) {
6159 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6160 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6161 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6162 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6163 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6164 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6165 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6166 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6167 &AttrMaxThreadsVal);
6168 else
6169 continue;
6170
6171 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6172 if (AttrMaxThreadsVal > 0)
6173 MaxThreadsVal = MaxThreadsVal > 0
6174 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6175 : AttrMaxThreadsVal;
6176 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6177 if (AttrMaxBlocksVal > 0)
6178 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6179 : AttrMaxBlocksVal;
6180 }
6181 }
6182 }
6183
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6184 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6185 const OMPExecutableDirective &D, StringRef ParentName,
6186 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188
6189 llvm::TargetRegionEntryInfo EntryInfo =
6190 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6191
6192 CodeGenFunction CGF(CGM, true);
6193 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6194 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6195 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6196
6197 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6200 };
6201
6202 cantFail(OMPBuilder.emitTargetRegionFunction(
6203 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6204 OutlinedFnID));
6205
6206 if (!OutlinedFn)
6207 return;
6208
6209 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6210
6211 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6212 for (auto *A : C->getAttrs()) {
6213 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6214 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6215 }
6216 }
6217 }
6218
6219 /// Checks if the expression is constant or does not have non-trivial function
6220 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6221 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6222 // We can skip constant expressions.
6223 // We can skip expressions with trivial calls or simple expressions.
6224 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6225 !E->hasNonTrivialCall(Ctx)) &&
6226 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6227 }
6228
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6229 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6230 const Stmt *Body) {
6231 const Stmt *Child = Body->IgnoreContainers();
6232 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6233 Child = nullptr;
6234 for (const Stmt *S : C->body()) {
6235 if (const auto *E = dyn_cast<Expr>(S)) {
6236 if (isTrivial(Ctx, E))
6237 continue;
6238 }
6239 // Some of the statements can be ignored.
6240 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6241 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6242 continue;
6243 // Analyze declarations.
6244 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6245 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6246 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6247 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6248 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6249 isa<UsingDirectiveDecl>(D) ||
6250 isa<OMPDeclareReductionDecl>(D) ||
6251 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6252 return true;
6253 const auto *VD = dyn_cast<VarDecl>(D);
6254 if (!VD)
6255 return false;
6256 return VD->hasGlobalStorage() || !VD->isUsed();
6257 }))
6258 continue;
6259 }
6260 // Found multiple children - cannot get the one child only.
6261 if (Child)
6262 return nullptr;
6263 Child = S;
6264 }
6265 if (Child)
6266 Child = Child->IgnoreContainers();
6267 }
6268 return Child;
6269 }
6270
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)6271 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6272 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6273 int32_t &MaxTeamsVal) {
6274
6275 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6276 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6277 "Expected target-based executable directive.");
6278 switch (DirectiveKind) {
6279 case OMPD_target: {
6280 const auto *CS = D.getInnermostCapturedStmt();
6281 const auto *Body =
6282 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6283 const Stmt *ChildStmt =
6284 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6285 if (const auto *NestedDir =
6286 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6287 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6288 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6289 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6290 ->getNumTeams()
6291 .front();
6292 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6293 if (auto Constant =
6294 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6295 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6296 return NumTeams;
6297 }
6298 MinTeamsVal = MaxTeamsVal = 0;
6299 return nullptr;
6300 }
6301 MinTeamsVal = MaxTeamsVal = 1;
6302 return nullptr;
6303 }
6304 // A value of -1 is used to check if we need to emit no teams region
6305 MinTeamsVal = MaxTeamsVal = -1;
6306 return nullptr;
6307 }
6308 case OMPD_target_teams_loop:
6309 case OMPD_target_teams:
6310 case OMPD_target_teams_distribute:
6311 case OMPD_target_teams_distribute_simd:
6312 case OMPD_target_teams_distribute_parallel_for:
6313 case OMPD_target_teams_distribute_parallel_for_simd: {
6314 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6315 const Expr *NumTeams =
6316 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6317 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6318 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6319 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6320 return NumTeams;
6321 }
6322 MinTeamsVal = MaxTeamsVal = 0;
6323 return nullptr;
6324 }
6325 case OMPD_target_parallel:
6326 case OMPD_target_parallel_for:
6327 case OMPD_target_parallel_for_simd:
6328 case OMPD_target_parallel_loop:
6329 case OMPD_target_simd:
6330 MinTeamsVal = MaxTeamsVal = 1;
6331 return nullptr;
6332 case OMPD_parallel:
6333 case OMPD_for:
6334 case OMPD_parallel_for:
6335 case OMPD_parallel_loop:
6336 case OMPD_parallel_master:
6337 case OMPD_parallel_sections:
6338 case OMPD_for_simd:
6339 case OMPD_parallel_for_simd:
6340 case OMPD_cancel:
6341 case OMPD_cancellation_point:
6342 case OMPD_ordered:
6343 case OMPD_threadprivate:
6344 case OMPD_allocate:
6345 case OMPD_task:
6346 case OMPD_simd:
6347 case OMPD_tile:
6348 case OMPD_unroll:
6349 case OMPD_sections:
6350 case OMPD_section:
6351 case OMPD_single:
6352 case OMPD_master:
6353 case OMPD_critical:
6354 case OMPD_taskyield:
6355 case OMPD_barrier:
6356 case OMPD_taskwait:
6357 case OMPD_taskgroup:
6358 case OMPD_atomic:
6359 case OMPD_flush:
6360 case OMPD_depobj:
6361 case OMPD_scan:
6362 case OMPD_teams:
6363 case OMPD_target_data:
6364 case OMPD_target_exit_data:
6365 case OMPD_target_enter_data:
6366 case OMPD_distribute:
6367 case OMPD_distribute_simd:
6368 case OMPD_distribute_parallel_for:
6369 case OMPD_distribute_parallel_for_simd:
6370 case OMPD_teams_distribute:
6371 case OMPD_teams_distribute_simd:
6372 case OMPD_teams_distribute_parallel_for:
6373 case OMPD_teams_distribute_parallel_for_simd:
6374 case OMPD_target_update:
6375 case OMPD_declare_simd:
6376 case OMPD_declare_variant:
6377 case OMPD_begin_declare_variant:
6378 case OMPD_end_declare_variant:
6379 case OMPD_declare_target:
6380 case OMPD_end_declare_target:
6381 case OMPD_declare_reduction:
6382 case OMPD_declare_mapper:
6383 case OMPD_taskloop:
6384 case OMPD_taskloop_simd:
6385 case OMPD_master_taskloop:
6386 case OMPD_master_taskloop_simd:
6387 case OMPD_parallel_master_taskloop:
6388 case OMPD_parallel_master_taskloop_simd:
6389 case OMPD_requires:
6390 case OMPD_metadirective:
6391 case OMPD_unknown:
6392 break;
6393 default:
6394 break;
6395 }
6396 llvm_unreachable("Unexpected directive kind.");
6397 }
6398
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6399 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6400 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6401 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6402 "Clauses associated with the teams directive expected to be emitted "
6403 "only for the host!");
6404 CGBuilderTy &Bld = CGF.Builder;
6405 int32_t MinNT = -1, MaxNT = -1;
6406 const Expr *NumTeams =
6407 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6408 if (NumTeams != nullptr) {
6409 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6410
6411 switch (DirectiveKind) {
6412 case OMPD_target: {
6413 const auto *CS = D.getInnermostCapturedStmt();
6414 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6415 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6416 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6417 /*IgnoreResultAssign*/ true);
6418 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6419 /*isSigned=*/true);
6420 }
6421 case OMPD_target_teams:
6422 case OMPD_target_teams_distribute:
6423 case OMPD_target_teams_distribute_simd:
6424 case OMPD_target_teams_distribute_parallel_for:
6425 case OMPD_target_teams_distribute_parallel_for_simd: {
6426 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6427 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6428 /*IgnoreResultAssign*/ true);
6429 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6430 /*isSigned=*/true);
6431 }
6432 default:
6433 break;
6434 }
6435 }
6436
6437 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6438 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6439 }
6440
6441 /// Check for a num threads constant value (stored in \p DefaultVal), or
6442 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6443 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6444 /// nullptr, no expression evaluation is perfomed.
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,const Expr ** E,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal)6445 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6446 const Expr **E, int32_t &UpperBound,
6447 bool UpperBoundOnly, llvm::Value **CondVal) {
6448 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6449 CGF.getContext(), CS->getCapturedStmt());
6450 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6451 if (!Dir)
6452 return;
6453
6454 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6455 // Handle if clause. If if clause present, the number of threads is
6456 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6457 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6458 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 const OMPIfClause *IfClause = nullptr;
6461 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6462 if (C->getNameModifier() == OMPD_unknown ||
6463 C->getNameModifier() == OMPD_parallel) {
6464 IfClause = C;
6465 break;
6466 }
6467 }
6468 if (IfClause) {
6469 const Expr *CondExpr = IfClause->getCondition();
6470 bool Result;
6471 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6472 if (!Result) {
6473 UpperBound = 1;
6474 return;
6475 }
6476 } else {
6477 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6478 if (const auto *PreInit =
6479 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6480 for (const auto *I : PreInit->decls()) {
6481 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6482 CGF.EmitVarDecl(cast<VarDecl>(*I));
6483 } else {
6484 CodeGenFunction::AutoVarEmission Emission =
6485 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6486 CGF.EmitAutoVarCleanups(Emission);
6487 }
6488 }
6489 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6490 }
6491 }
6492 }
6493 }
6494 // Check the value of num_threads clause iff if clause was not specified
6495 // or is not evaluated to false.
6496 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6497 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499 const auto *NumThreadsClause =
6500 Dir->getSingleClause<OMPNumThreadsClause>();
6501 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6502 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6503 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6504 UpperBound =
6505 UpperBound
6506 ? Constant->getZExtValue()
6507 : std::min(UpperBound,
6508 static_cast<int32_t>(Constant->getZExtValue()));
6509 // If we haven't found a upper bound, remember we saw a thread limiting
6510 // clause.
6511 if (UpperBound == -1)
6512 UpperBound = 0;
6513 if (!E)
6514 return;
6515 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6516 if (const auto *PreInit =
6517 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6518 for (const auto *I : PreInit->decls()) {
6519 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6520 CGF.EmitVarDecl(cast<VarDecl>(*I));
6521 } else {
6522 CodeGenFunction::AutoVarEmission Emission =
6523 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6524 CGF.EmitAutoVarCleanups(Emission);
6525 }
6526 }
6527 }
6528 *E = NTExpr;
6529 }
6530 return;
6531 }
6532 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6533 UpperBound = 1;
6534 }
6535
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal,const Expr ** ThreadLimitExpr)6536 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6537 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6538 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6539 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6540 "Clauses associated with the teams directive expected to be emitted "
6541 "only for the host!");
6542 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6543 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6544 "Expected target-based executable directive.");
6545
6546 const Expr *NT = nullptr;
6547 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6548
6549 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6550 if (E->isIntegerConstantExpr(CGF.getContext())) {
6551 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6552 UpperBound = UpperBound ? Constant->getZExtValue()
6553 : std::min(UpperBound,
6554 int32_t(Constant->getZExtValue()));
6555 }
6556 // If we haven't found a upper bound, remember we saw a thread limiting
6557 // clause.
6558 if (UpperBound == -1)
6559 UpperBound = 0;
6560 if (EPtr)
6561 *EPtr = E;
6562 };
6563
6564 auto ReturnSequential = [&]() {
6565 UpperBound = 1;
6566 return NT;
6567 };
6568
6569 switch (DirectiveKind) {
6570 case OMPD_target: {
6571 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6572 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6573 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6574 CGF.getContext(), CS->getCapturedStmt());
6575 // TODO: The standard is not clear how to resolve two thread limit clauses,
6576 // let's pick the teams one if it's present, otherwise the target one.
6577 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6578 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6579 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6580 ThreadLimitClause = TLC;
6581 if (ThreadLimitExpr) {
6582 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6583 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6584 CodeGenFunction::LexicalScope Scope(
6585 CGF,
6586 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6587 if (const auto *PreInit =
6588 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6589 for (const auto *I : PreInit->decls()) {
6590 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591 CGF.EmitVarDecl(cast<VarDecl>(*I));
6592 } else {
6593 CodeGenFunction::AutoVarEmission Emission =
6594 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6595 CGF.EmitAutoVarCleanups(Emission);
6596 }
6597 }
6598 }
6599 }
6600 }
6601 }
6602 if (ThreadLimitClause)
6603 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6604 ThreadLimitExpr);
6605 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6606 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6607 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6608 CS = Dir->getInnermostCapturedStmt();
6609 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6610 CGF.getContext(), CS->getCapturedStmt());
6611 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6612 }
6613 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6614 CS = Dir->getInnermostCapturedStmt();
6615 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6617 return ReturnSequential();
6618 }
6619 return NT;
6620 }
6621 case OMPD_target_teams: {
6622 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6623 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6624 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6625 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6626 ThreadLimitExpr);
6627 }
6628 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6629 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6630 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631 CGF.getContext(), CS->getCapturedStmt());
6632 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6633 if (Dir->getDirectiveKind() == OMPD_distribute) {
6634 CS = Dir->getInnermostCapturedStmt();
6635 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636 }
6637 }
6638 return NT;
6639 }
6640 case OMPD_target_teams_distribute:
6641 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6642 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6643 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6644 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6645 ThreadLimitExpr);
6646 }
6647 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6648 UpperBoundOnly, CondVal);
6649 return NT;
6650 case OMPD_target_teams_loop:
6651 case OMPD_target_parallel_loop:
6652 case OMPD_target_parallel:
6653 case OMPD_target_parallel_for:
6654 case OMPD_target_parallel_for_simd:
6655 case OMPD_target_teams_distribute_parallel_for:
6656 case OMPD_target_teams_distribute_parallel_for_simd: {
6657 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6658 const OMPIfClause *IfClause = nullptr;
6659 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6660 if (C->getNameModifier() == OMPD_unknown ||
6661 C->getNameModifier() == OMPD_parallel) {
6662 IfClause = C;
6663 break;
6664 }
6665 }
6666 if (IfClause) {
6667 const Expr *Cond = IfClause->getCondition();
6668 bool Result;
6669 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6670 if (!Result)
6671 return ReturnSequential();
6672 } else {
6673 CodeGenFunction::RunCleanupsScope Scope(CGF);
6674 *CondVal = CGF.EvaluateExprAsBool(Cond);
6675 }
6676 }
6677 }
6678 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6679 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6680 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6681 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6682 ThreadLimitExpr);
6683 }
6684 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6685 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6686 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6687 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6688 return NumThreadsClause->getNumThreads();
6689 }
6690 return NT;
6691 }
6692 case OMPD_target_teams_distribute_simd:
6693 case OMPD_target_simd:
6694 return ReturnSequential();
6695 default:
6696 break;
6697 }
6698 llvm_unreachable("Unsupported directive kind.");
6699 }
6700
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6701 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6702 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6703 llvm::Value *NumThreadsVal = nullptr;
6704 llvm::Value *CondVal = nullptr;
6705 llvm::Value *ThreadLimitVal = nullptr;
6706 const Expr *ThreadLimitExpr = nullptr;
6707 int32_t UpperBound = -1;
6708
6709 const Expr *NT = getNumThreadsExprForTargetDirective(
6710 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6711 &ThreadLimitExpr);
6712
6713 // Thread limit expressions are used below, emit them.
6714 if (ThreadLimitExpr) {
6715 ThreadLimitVal =
6716 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6717 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6718 /*isSigned=*/false);
6719 }
6720
6721 // Generate the num teams expression.
6722 if (UpperBound == 1) {
6723 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6724 } else if (NT) {
6725 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6726 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6727 /*isSigned=*/false);
6728 } else if (ThreadLimitVal) {
6729 // If we do not have a num threads value but a thread limit, replace the
6730 // former with the latter. We know handled the thread limit expression.
6731 NumThreadsVal = ThreadLimitVal;
6732 ThreadLimitVal = nullptr;
6733 } else {
6734 // Default to "0" which means runtime choice.
6735 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6736 NumThreadsVal = CGF.Builder.getInt32(0);
6737 }
6738
6739 // Handle if clause. If if clause present, the number of threads is
6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741 if (CondVal) {
6742 CodeGenFunction::RunCleanupsScope Scope(CGF);
6743 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6744 CGF.Builder.getInt32(1));
6745 }
6746
6747 // If the thread limit and num teams expression were present, take the
6748 // minimum.
6749 if (ThreadLimitVal) {
6750 NumThreadsVal = CGF.Builder.CreateSelect(
6751 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6752 ThreadLimitVal, NumThreadsVal);
6753 }
6754
6755 return NumThreadsVal;
6756 }
6757
6758 namespace {
6759 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6760
6761 // Utility to handle information from clauses associated with a given
6762 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6763 // It provides a convenient interface to obtain the information and generate
6764 // code for that information.
6765 class MappableExprsHandler {
6766 public:
6767 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()6768 static unsigned getFlagMemberOffset() {
6769 unsigned Offset = 0;
6770 for (uint64_t Remain =
6771 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6772 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6773 !(Remain & 1); Remain = Remain >> 1)
6774 Offset++;
6775 return Offset;
6776 }
6777
6778 /// Class that holds debugging information for a data mapping to be passed to
6779 /// the runtime library.
6780 class MappingExprInfo {
6781 /// The variable declaration used for the data mapping.
6782 const ValueDecl *MapDecl = nullptr;
6783 /// The original expression used in the map clause, or null if there is
6784 /// none.
6785 const Expr *MapExpr = nullptr;
6786
6787 public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)6788 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6789 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6790
getMapDecl() const6791 const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const6792 const Expr *getMapExpr() const { return MapExpr; }
6793 };
6794
6795 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6796 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6797 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6798 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6799 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6800 using MapNonContiguousArrayTy =
6801 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6802 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6803 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6804 using MapData =
6805 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
6806 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
6807 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6808 using MapDataArrayTy = SmallVector<MapData, 4>;
6809
6810 /// This structure contains combined information generated for mappable
6811 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6812 /// mappers, and non-contiguous information.
6813 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6814 MapExprsArrayTy Exprs;
6815 MapValueDeclsArrayTy Mappers;
6816 MapValueDeclsArrayTy DevicePtrDecls;
6817
6818 /// Append arrays in \a CurInfo.
append__anon93cce0fb3111::MappableExprsHandler::MapCombinedInfoTy6819 void append(MapCombinedInfoTy &CurInfo) {
6820 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6821 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6822 CurInfo.DevicePtrDecls.end());
6823 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6824 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6825 }
6826 };
6827
6828 /// Map between a struct and the its lowest & highest elements which have been
6829 /// mapped.
6830 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6831 /// HE(FieldIndex, Pointer)}
6832 struct StructRangeInfoTy {
6833 MapCombinedInfoTy PreliminaryMapData;
6834 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6835 0, Address::invalid()};
6836 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6837 0, Address::invalid()};
6838 Address Base = Address::invalid();
6839 Address LB = Address::invalid();
6840 bool IsArraySection = false;
6841 bool HasCompleteRecord = false;
6842 };
6843
6844 private:
6845 /// Kind that defines how a device pointer has to be returned.
6846 struct MapInfo {
6847 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6848 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6849 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6850 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6851 bool ReturnDevicePointer = false;
6852 bool IsImplicit = false;
6853 const ValueDecl *Mapper = nullptr;
6854 const Expr *VarRef = nullptr;
6855 bool ForDeviceAddr = false;
6856
6857 MapInfo() = default;
MapInfo__anon93cce0fb3111::MappableExprsHandler::MapInfo6858 MapInfo(
6859 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6860 OpenMPMapClauseKind MapType,
6861 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6862 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6863 bool ReturnDevicePointer, bool IsImplicit,
6864 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6865 bool ForDeviceAddr = false)
6866 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6867 MotionModifiers(MotionModifiers),
6868 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6869 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6870 };
6871
6872 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6873 /// member and there is no map information about it, then emission of that
6874 /// entry is deferred until the whole struct has been processed.
6875 struct DeferredDevicePtrEntryTy {
6876 const Expr *IE = nullptr;
6877 const ValueDecl *VD = nullptr;
6878 bool ForDeviceAddr = false;
6879
DeferredDevicePtrEntryTy__anon93cce0fb3111::MappableExprsHandler::DeferredDevicePtrEntryTy6880 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6881 bool ForDeviceAddr)
6882 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6883 };
6884
6885 /// The target directive from where the mappable clauses were extracted. It
6886 /// is either a executable directive or a user-defined mapper directive.
6887 llvm::PointerUnion<const OMPExecutableDirective *,
6888 const OMPDeclareMapperDecl *>
6889 CurDir;
6890
6891 /// Function the directive is being generated for.
6892 CodeGenFunction &CGF;
6893
6894 /// Set of all first private variables in the current directive.
6895 /// bool data is set to true if the variable is implicitly marked as
6896 /// firstprivate, false otherwise.
6897 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6898
6899 /// Map between device pointer declarations and their expression components.
6900 /// The key value for declarations in 'this' is null.
6901 llvm::DenseMap<
6902 const ValueDecl *,
6903 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6904 DevPointersMap;
6905
6906 /// Map between device addr declarations and their expression components.
6907 /// The key value for declarations in 'this' is null.
6908 llvm::DenseMap<
6909 const ValueDecl *,
6910 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6911 HasDevAddrsMap;
6912
6913 /// Map between lambda declarations and their map type.
6914 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6915
getExprTypeSize(const Expr * E) const6916 llvm::Value *getExprTypeSize(const Expr *E) const {
6917 QualType ExprTy = E->getType().getCanonicalType();
6918
6919 // Calculate the size for array shaping expression.
6920 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6921 llvm::Value *Size =
6922 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6923 for (const Expr *SE : OAE->getDimensions()) {
6924 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6925 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6926 CGF.getContext().getSizeType(),
6927 SE->getExprLoc());
6928 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6929 }
6930 return Size;
6931 }
6932
6933 // Reference types are ignored for mapping purposes.
6934 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6935 ExprTy = RefTy->getPointeeType().getCanonicalType();
6936
6937 // Given that an array section is considered a built-in type, we need to
6938 // do the calculation based on the length of the section instead of relying
6939 // on CGF.getTypeSize(E->getType()).
6940 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6941 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6942 OAE->getBase()->IgnoreParenImpCasts())
6943 .getCanonicalType();
6944
6945 // If there is no length associated with the expression and lower bound is
6946 // not specified too, that means we are using the whole length of the
6947 // base.
6948 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6949 !OAE->getLowerBound())
6950 return CGF.getTypeSize(BaseTy);
6951
6952 llvm::Value *ElemSize;
6953 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6954 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6955 } else {
6956 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6957 assert(ATy && "Expecting array type if not a pointer type.");
6958 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6959 }
6960
6961 // If we don't have a length at this point, that is because we have an
6962 // array section with a single element.
6963 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6964 return ElemSize;
6965
6966 if (const Expr *LenExpr = OAE->getLength()) {
6967 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6968 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6969 CGF.getContext().getSizeType(),
6970 LenExpr->getExprLoc());
6971 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6972 }
6973 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6974 OAE->getLowerBound() && "expected array_section[lb:].");
6975 // Size = sizetype - lb * elemtype;
6976 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6977 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6978 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6979 CGF.getContext().getSizeType(),
6980 OAE->getLowerBound()->getExprLoc());
6981 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6982 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6983 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6984 LengthVal = CGF.Builder.CreateSelect(
6985 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6986 return LengthVal;
6987 }
6988 return CGF.getTypeSize(ExprTy);
6989 }
6990
6991 /// Return the corresponding bits for a given map clause modifier. Add
6992 /// a flag marking the map as a pointer if requested. Add a flag marking the
6993 /// map as the first one of a series of maps that relate to the same map
6994 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const6995 OpenMPOffloadMappingFlags getMapTypeBits(
6996 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6997 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6998 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6999 OpenMPOffloadMappingFlags Bits =
7000 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7001 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7002 switch (MapType) {
7003 case OMPC_MAP_alloc:
7004 case OMPC_MAP_release:
7005 // alloc and release is the default behavior in the runtime library, i.e.
7006 // if we don't pass any bits alloc/release that is what the runtime is
7007 // going to do. Therefore, we don't need to signal anything for these two
7008 // type modifiers.
7009 break;
7010 case OMPC_MAP_to:
7011 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7012 break;
7013 case OMPC_MAP_from:
7014 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7015 break;
7016 case OMPC_MAP_tofrom:
7017 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7018 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7019 break;
7020 case OMPC_MAP_delete:
7021 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7022 break;
7023 case OMPC_MAP_unknown:
7024 llvm_unreachable("Unexpected map type!");
7025 }
7026 if (AddPtrFlag)
7027 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7028 if (AddIsTargetParamFlag)
7029 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7030 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7031 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7032 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7033 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7034 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7035 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7036 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7037 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7038 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7039 if (IsNonContiguous)
7040 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7041 return Bits;
7042 }
7043
7044 /// Return true if the provided expression is a final array section. A
7045 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7046 bool isFinalArraySectionExpression(const Expr *E) const {
7047 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7048
7049 // It is not an array section and therefore not a unity-size one.
7050 if (!OASE)
7051 return false;
7052
7053 // An array section with no colon always refer to a single element.
7054 if (OASE->getColonLocFirst().isInvalid())
7055 return false;
7056
7057 const Expr *Length = OASE->getLength();
7058
7059 // If we don't have a length we have to check if the array has size 1
7060 // for this dimension. Also, we should always expect a length if the
7061 // base type is pointer.
7062 if (!Length) {
7063 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7064 OASE->getBase()->IgnoreParenImpCasts())
7065 .getCanonicalType();
7066 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7067 return ATy->getSExtSize() != 1;
7068 // If we don't have a constant dimension length, we have to consider
7069 // the current section as having any size, so it is not necessarily
7070 // unitary. If it happen to be unity size, that's user fault.
7071 return true;
7072 }
7073
7074 // Check if the length evaluates to 1.
7075 Expr::EvalResult Result;
7076 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7077 return true; // Can have more that size 1.
7078
7079 llvm::APSInt ConstLength = Result.Val.getInt();
7080 return ConstLength.getSExtValue() != 1;
7081 }
7082
7083 /// Generate the base pointers, section pointers, sizes, map type bits, and
7084 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7085 /// map type, map or motion modifiers, and expression components.
7086 /// \a IsFirstComponent should be set to true if the provided set of
7087 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,MapCombinedInfoTy & StructBaseCombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool GenerateAllInfoForClauses,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements={},bool AreBothBasePtrAndPteeMapped=false) const7088 void generateInfoForComponentList(
7089 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7090 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7091 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7092 MapCombinedInfoTy &CombinedInfo,
7093 MapCombinedInfoTy &StructBaseCombinedInfo,
7094 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7095 bool IsImplicit, bool GenerateAllInfoForClauses,
7096 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7097 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7098 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7099 OverlappedElements = {},
7100 bool AreBothBasePtrAndPteeMapped = false) const {
7101 // The following summarizes what has to be generated for each map and the
7102 // types below. The generated information is expressed in this order:
7103 // base pointer, section pointer, size, flags
7104 // (to add to the ones that come from the map type and modifier).
7105 //
7106 // double d;
7107 // int i[100];
7108 // float *p;
7109 // int **a = &i;
7110 //
7111 // struct S1 {
7112 // int i;
7113 // float f[50];
7114 // }
7115 // struct S2 {
7116 // int i;
7117 // float f[50];
7118 // S1 s;
7119 // double *p;
7120 // struct S2 *ps;
7121 // int &ref;
7122 // }
7123 // S2 s;
7124 // S2 *ps;
7125 //
7126 // map(d)
7127 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7128 //
7129 // map(i)
7130 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7131 //
7132 // map(i[1:23])
7133 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7134 //
7135 // map(p)
7136 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7137 //
7138 // map(p[1:24])
7139 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7140 // in unified shared memory mode or for local pointers
7141 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7142 //
7143 // map((*a)[0:3])
7144 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7145 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7146 //
7147 // map(**a)
7148 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7149 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7150 //
7151 // map(s)
7152 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7153 //
7154 // map(s.i)
7155 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7156 //
7157 // map(s.s.f)
7158 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7159 //
7160 // map(s.p)
7161 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7162 //
7163 // map(to: s.p[:22])
7164 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7165 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7166 // &(s.p), &(s.p[0]), 22*sizeof(double),
7167 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7168 // (*) alloc space for struct members, only this is a target parameter
7169 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7170 // optimizes this entry out, same in the examples below)
7171 // (***) map the pointee (map: to)
7172 //
7173 // map(to: s.ref)
7174 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7175 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7176 // (*) alloc space for struct members, only this is a target parameter
7177 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7178 // optimizes this entry out, same in the examples below)
7179 // (***) map the pointee (map: to)
7180 //
7181 // map(s.ps)
7182 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7183 //
7184 // map(from: s.ps->s.i)
7185 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7186 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7187 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7188 //
7189 // map(to: s.ps->ps)
7190 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7191 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7192 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7193 //
7194 // map(s.ps->ps->ps)
7195 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7196 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7197 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7198 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7199 //
7200 // map(to: s.ps->ps->s.f[:22])
7201 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7202 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7203 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7204 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7205 //
7206 // map(ps)
7207 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7208 //
7209 // map(ps->i)
7210 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7211 //
7212 // map(ps->s.f)
7213 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7214 //
7215 // map(from: ps->p)
7216 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7217 //
7218 // map(to: ps->p[:22])
7219 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7220 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7221 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7222 //
7223 // map(ps->ps)
7224 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7225 //
7226 // map(from: ps->ps->s.i)
7227 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7228 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7229 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7230 //
7231 // map(from: ps->ps->ps)
7232 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7233 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7234 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7235 //
7236 // map(ps->ps->ps->ps)
7237 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7238 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7239 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7240 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7241 //
7242 // map(to: ps->ps->ps->s.f[:22])
7243 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7244 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7245 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7247 //
7248 // map(to: s.f[:22]) map(from: s.p[:33])
7249 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7250 // sizeof(double*) (**), TARGET_PARAM
7251 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7252 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7253 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7254 // (*) allocate contiguous space needed to fit all mapped members even if
7255 // we allocate space for members not mapped (in this example,
7256 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7257 // them as well because they fall between &s.f[0] and &s.p)
7258 //
7259 // map(from: s.f[:22]) map(to: ps->p[:33])
7260 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7261 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7262 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7263 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7264 // (*) the struct this entry pertains to is the 2nd element in the list of
7265 // arguments, hence MEMBER_OF(2)
7266 //
7267 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7268 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7269 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7270 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7271 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7272 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7273 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7274 // (*) the struct this entry pertains to is the 4th element in the list
7275 // of arguments, hence MEMBER_OF(4)
7276 //
7277 // map(p, p[:100])
7278 // ===> map(p[:100])
7279 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7280
7281 // Track if the map information being generated is the first for a capture.
7282 bool IsCaptureFirstInfo = IsFirstComponentList;
7283 // When the variable is on a declare target link or in a to clause with
7284 // unified memory, a reference is needed to hold the host/device address
7285 // of the variable.
7286 bool RequiresReference = false;
7287
7288 // Scan the components from the base to the complete expression.
7289 auto CI = Components.rbegin();
7290 auto CE = Components.rend();
7291 auto I = CI;
7292
7293 // Track if the map information being generated is the first for a list of
7294 // components.
7295 bool IsExpressionFirstInfo = true;
7296 bool FirstPointerInComplexData = false;
7297 Address BP = Address::invalid();
7298 const Expr *AssocExpr = I->getAssociatedExpression();
7299 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7300 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7301 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7302
7303 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7304 return;
7305 if (isa<MemberExpr>(AssocExpr)) {
7306 // The base is the 'this' pointer. The content of the pointer is going
7307 // to be the base of the field being mapped.
7308 BP = CGF.LoadCXXThisAddress();
7309 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7310 (OASE &&
7311 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7312 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7313 } else if (OAShE &&
7314 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7315 BP = Address(
7316 CGF.EmitScalarExpr(OAShE->getBase()),
7317 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7318 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7319 } else {
7320 // The base is the reference to the variable.
7321 // BP = &Var.
7322 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7323 if (const auto *VD =
7324 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7325 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7326 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7327 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7328 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7329 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7330 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7331 RequiresReference = true;
7332 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7333 }
7334 }
7335 }
7336
7337 // If the variable is a pointer and is being dereferenced (i.e. is not
7338 // the last component), the base has to be the pointer itself, not its
7339 // reference. References are ignored for mapping purposes.
7340 QualType Ty =
7341 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7342 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7343 // No need to generate individual map information for the pointer, it
7344 // can be associated with the combined storage if shared memory mode is
7345 // active or the base declaration is not global variable.
7346 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7347 if (!AreBothBasePtrAndPteeMapped &&
7348 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7349 !VD || VD->hasLocalStorage()))
7350 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7351 else
7352 FirstPointerInComplexData = true;
7353 ++I;
7354 }
7355 }
7356
7357 // Track whether a component of the list should be marked as MEMBER_OF some
7358 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7359 // in a component list should be marked as MEMBER_OF, all subsequent entries
7360 // do not belong to the base struct. E.g.
7361 // struct S2 s;
7362 // s.ps->ps->ps->f[:]
7363 // (1) (2) (3) (4)
7364 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7365 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7366 // is the pointee of ps(2) which is not member of struct s, so it should not
7367 // be marked as such (it is still PTR_AND_OBJ).
7368 // The variable is initialized to false so that PTR_AND_OBJ entries which
7369 // are not struct members are not considered (e.g. array of pointers to
7370 // data).
7371 bool ShouldBeMemberOf = false;
7372
7373 // Variable keeping track of whether or not we have encountered a component
7374 // in the component list which is a member expression. Useful when we have a
7375 // pointer or a final array section, in which case it is the previous
7376 // component in the list which tells us whether we have a member expression.
7377 // E.g. X.f[:]
7378 // While processing the final array section "[:]" it is "f" which tells us
7379 // whether we are dealing with a member of a declared struct.
7380 const MemberExpr *EncounteredME = nullptr;
7381
7382 // Track for the total number of dimension. Start from one for the dummy
7383 // dimension.
7384 uint64_t DimSize = 1;
7385
7386 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7387 bool IsPrevMemberReference = false;
7388
7389 bool IsPartialMapped =
7390 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7391
7392 // We need to check if we will be encountering any MEs. If we do not
7393 // encounter any ME expression it means we will be mapping the whole struct.
7394 // In that case we need to skip adding an entry for the struct to the
7395 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7396 // list only when generating all info for clauses.
7397 bool IsMappingWholeStruct = true;
7398 if (!GenerateAllInfoForClauses) {
7399 IsMappingWholeStruct = false;
7400 } else {
7401 for (auto TempI = I; TempI != CE; ++TempI) {
7402 const MemberExpr *PossibleME =
7403 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7404 if (PossibleME) {
7405 IsMappingWholeStruct = false;
7406 break;
7407 }
7408 }
7409 }
7410
7411 for (; I != CE; ++I) {
7412 // If the current component is member of a struct (parent struct) mark it.
7413 if (!EncounteredME) {
7414 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7415 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7416 // as MEMBER_OF the parent struct.
7417 if (EncounteredME) {
7418 ShouldBeMemberOf = true;
7419 // Do not emit as complex pointer if this is actually not array-like
7420 // expression.
7421 if (FirstPointerInComplexData) {
7422 QualType Ty = std::prev(I)
7423 ->getAssociatedDeclaration()
7424 ->getType()
7425 .getNonReferenceType();
7426 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7427 FirstPointerInComplexData = false;
7428 }
7429 }
7430 }
7431
7432 auto Next = std::next(I);
7433
7434 // We need to generate the addresses and sizes if this is the last
7435 // component, if the component is a pointer or if it is an array section
7436 // whose length can't be proved to be one. If this is a pointer, it
7437 // becomes the base address for the following components.
7438
7439 // A final array section, is one whose length can't be proved to be one.
7440 // If the map item is non-contiguous then we don't treat any array section
7441 // as final array section.
7442 bool IsFinalArraySection =
7443 !IsNonContiguous &&
7444 isFinalArraySectionExpression(I->getAssociatedExpression());
7445
7446 // If we have a declaration for the mapping use that, otherwise use
7447 // the base declaration of the map clause.
7448 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7449 ? I->getAssociatedDeclaration()
7450 : BaseDecl;
7451 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7452 : MapExpr;
7453
7454 // Get information on whether the element is a pointer. Have to do a
7455 // special treatment for array sections given that they are built-in
7456 // types.
7457 const auto *OASE =
7458 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7459 const auto *OAShE =
7460 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7461 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7462 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7463 bool IsPointer =
7464 OAShE ||
7465 (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7466 .getCanonicalType()
7467 ->isAnyPointerType()) ||
7468 I->getAssociatedExpression()->getType()->isAnyPointerType();
7469 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7470 MapDecl &&
7471 MapDecl->getType()->isLValueReferenceType();
7472 bool IsNonDerefPointer = IsPointer &&
7473 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7474 !IsNonContiguous;
7475
7476 if (OASE)
7477 ++DimSize;
7478
7479 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7480 IsFinalArraySection) {
7481 // If this is not the last component, we expect the pointer to be
7482 // associated with an array expression or member expression.
7483 assert((Next == CE ||
7484 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7485 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7486 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7487 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7488 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7489 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7490 "Unexpected expression");
7491
7492 Address LB = Address::invalid();
7493 Address LowestElem = Address::invalid();
7494 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
__anon93cce0fb3202(CodeGenFunction &CGF, const MemberExpr *E) 7495 const MemberExpr *E) {
7496 const Expr *BaseExpr = E->getBase();
7497 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7498 // scalar.
7499 LValue BaseLV;
7500 if (E->isArrow()) {
7501 LValueBaseInfo BaseInfo;
7502 TBAAAccessInfo TBAAInfo;
7503 Address Addr =
7504 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7505 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7506 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7507 } else {
7508 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7509 }
7510 return BaseLV;
7511 };
7512 if (OAShE) {
7513 LowestElem = LB =
7514 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7515 CGF.ConvertTypeForMem(
7516 OAShE->getBase()->getType()->getPointeeType()),
7517 CGF.getContext().getTypeAlignInChars(
7518 OAShE->getBase()->getType()));
7519 } else if (IsMemberReference) {
7520 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7521 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7522 LowestElem = CGF.EmitLValueForFieldInitialization(
7523 BaseLVal, cast<FieldDecl>(MapDecl))
7524 .getAddress();
7525 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7526 .getAddress();
7527 } else {
7528 LowestElem = LB =
7529 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7530 .getAddress();
7531 }
7532
7533 // If this component is a pointer inside the base struct then we don't
7534 // need to create any entry for it - it will be combined with the object
7535 // it is pointing to into a single PTR_AND_OBJ entry.
7536 bool IsMemberPointerOrAddr =
7537 EncounteredME &&
7538 (((IsPointer || ForDeviceAddr) &&
7539 I->getAssociatedExpression() == EncounteredME) ||
7540 (IsPrevMemberReference && !IsPointer) ||
7541 (IsMemberReference && Next != CE &&
7542 !Next->getAssociatedExpression()->getType()->isPointerType()));
7543 if (!OverlappedElements.empty() && Next == CE) {
7544 // Handle base element with the info for overlapped elements.
7545 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7546 assert(!IsPointer &&
7547 "Unexpected base element with the pointer type.");
7548 // Mark the whole struct as the struct that requires allocation on the
7549 // device.
7550 PartialStruct.LowestElem = {0, LowestElem};
7551 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7552 I->getAssociatedExpression()->getType());
7553 Address HB = CGF.Builder.CreateConstGEP(
7554 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7555 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7556 TypeSize.getQuantity() - 1);
7557 PartialStruct.HighestElem = {
7558 std::numeric_limits<decltype(
7559 PartialStruct.HighestElem.first)>::max(),
7560 HB};
7561 PartialStruct.Base = BP;
7562 PartialStruct.LB = LB;
7563 assert(
7564 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7565 "Overlapped elements must be used only once for the variable.");
7566 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7567 // Emit data for non-overlapped data.
7568 OpenMPOffloadMappingFlags Flags =
7569 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7570 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7571 /*AddPtrFlag=*/false,
7572 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7573 llvm::Value *Size = nullptr;
7574 // Do bitcopy of all non-overlapped structure elements.
7575 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7576 Component : OverlappedElements) {
7577 Address ComponentLB = Address::invalid();
7578 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7579 Component) {
7580 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7581 const auto *FD = dyn_cast<FieldDecl>(VD);
7582 if (FD && FD->getType()->isLValueReferenceType()) {
7583 const auto *ME =
7584 cast<MemberExpr>(MC.getAssociatedExpression());
7585 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7586 ComponentLB =
7587 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7588 .getAddress();
7589 } else {
7590 ComponentLB =
7591 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7592 .getAddress();
7593 }
7594 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7595 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7596 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7597 LBPtr);
7598 break;
7599 }
7600 }
7601 assert(Size && "Failed to determine structure size");
7602 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7603 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7604 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7605 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7606 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7607 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7608 Size, CGF.Int64Ty, /*isSigned=*/true));
7609 CombinedInfo.Types.push_back(Flags);
7610 CombinedInfo.Mappers.push_back(nullptr);
7611 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7612 : 1);
7613 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7614 }
7615 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7616 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7617 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7618 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7619 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7620 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7621 Size = CGF.Builder.CreatePtrDiff(
7622 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7623 LBPtr);
7624 CombinedInfo.Sizes.push_back(
7625 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7626 CombinedInfo.Types.push_back(Flags);
7627 CombinedInfo.Mappers.push_back(nullptr);
7628 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629 : 1);
7630 break;
7631 }
7632 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7633 // Skip adding an entry in the CurInfo of this combined entry if the
7634 // whole struct is currently being mapped. The struct needs to be added
7635 // in the first position before any data internal to the struct is being
7636 // mapped.
7637 // Skip adding an entry in the CurInfo of this combined entry if the
7638 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7639 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7640 (Next == CE && MapType != OMPC_MAP_unknown)) {
7641 if (!IsMappingWholeStruct) {
7642 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7643 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7644 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7645 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7646 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7647 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7648 Size, CGF.Int64Ty, /*isSigned=*/true));
7649 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7650 : 1);
7651 } else {
7652 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7653 StructBaseCombinedInfo.BasePointers.push_back(
7654 BP.emitRawPointer(CGF));
7655 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7656 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7657 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7658 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7659 Size, CGF.Int64Ty, /*isSigned=*/true));
7660 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7661 IsNonContiguous ? DimSize : 1);
7662 }
7663
7664 // If Mapper is valid, the last component inherits the mapper.
7665 bool HasMapper = Mapper && Next == CE;
7666 if (!IsMappingWholeStruct)
7667 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7668 else
7669 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7670 : nullptr);
7671
7672 // We need to add a pointer flag for each map that comes from the
7673 // same expression except for the first one. We also need to signal
7674 // this map is the first one that relates with the current capture
7675 // (there is a set of entries for each capture).
7676 OpenMPOffloadMappingFlags Flags =
7677 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7678 !IsExpressionFirstInfo || RequiresReference ||
7679 FirstPointerInComplexData || IsMemberReference,
7680 AreBothBasePtrAndPteeMapped ||
7681 (IsCaptureFirstInfo && !RequiresReference),
7682 IsNonContiguous);
7683
7684 if (!IsExpressionFirstInfo || IsMemberReference) {
7685 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7686 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7687 if (IsPointer || (IsMemberReference && Next != CE))
7688 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7689 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7690 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7691 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7692 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7693
7694 if (ShouldBeMemberOf) {
7695 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696 // should be later updated with the correct value of MEMBER_OF.
7697 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7698 // From now on, all subsequent PTR_AND_OBJ entries should not be
7699 // marked as MEMBER_OF.
7700 ShouldBeMemberOf = false;
7701 }
7702 }
7703
7704 if (!IsMappingWholeStruct)
7705 CombinedInfo.Types.push_back(Flags);
7706 else
7707 StructBaseCombinedInfo.Types.push_back(Flags);
7708 }
7709
7710 // If we have encountered a member expression so far, keep track of the
7711 // mapped member. If the parent is "*this", then the value declaration
7712 // is nullptr.
7713 if (EncounteredME) {
7714 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7715 unsigned FieldIndex = FD->getFieldIndex();
7716
7717 // Update info about the lowest and highest elements for this struct
7718 if (!PartialStruct.Base.isValid()) {
7719 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7720 if (IsFinalArraySection && OASE) {
7721 Address HB =
7722 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7723 .getAddress();
7724 PartialStruct.HighestElem = {FieldIndex, HB};
7725 } else {
7726 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7727 }
7728 PartialStruct.Base = BP;
7729 PartialStruct.LB = BP;
7730 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7731 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7732 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7733 if (IsFinalArraySection && OASE) {
7734 Address HB =
7735 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7736 .getAddress();
7737 PartialStruct.HighestElem = {FieldIndex, HB};
7738 } else {
7739 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7740 }
7741 }
7742 }
7743
7744 // Need to emit combined struct for array sections.
7745 if (IsFinalArraySection || IsNonContiguous)
7746 PartialStruct.IsArraySection = true;
7747
7748 // If we have a final array section, we are done with this expression.
7749 if (IsFinalArraySection)
7750 break;
7751
7752 // The pointer becomes the base for the next element.
7753 if (Next != CE)
7754 BP = IsMemberReference ? LowestElem : LB;
7755 if (!IsPartialMapped)
7756 IsExpressionFirstInfo = false;
7757 IsCaptureFirstInfo = false;
7758 FirstPointerInComplexData = false;
7759 IsPrevMemberReference = IsMemberReference;
7760 } else if (FirstPointerInComplexData) {
7761 QualType Ty = Components.rbegin()
7762 ->getAssociatedDeclaration()
7763 ->getType()
7764 .getNonReferenceType();
7765 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7766 FirstPointerInComplexData = false;
7767 }
7768 }
7769 // If ran into the whole component - allocate the space for the whole
7770 // record.
7771 if (!EncounteredME)
7772 PartialStruct.HasCompleteRecord = true;
7773
7774 if (!IsNonContiguous)
7775 return;
7776
7777 const ASTContext &Context = CGF.getContext();
7778
7779 // For supporting stride in array section, we need to initialize the first
7780 // dimension size as 1, first offset as 0, and first count as 1
7781 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7782 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7783 MapValuesArrayTy CurStrides;
7784 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7785 uint64_t ElementTypeSize;
7786
7787 // Collect Size information for each dimension and get the element size as
7788 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7789 // should be [10, 10] and the first stride is 4 btyes.
7790 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7791 Components) {
7792 const Expr *AssocExpr = Component.getAssociatedExpression();
7793 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7794
7795 if (!OASE)
7796 continue;
7797
7798 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7799 auto *CAT = Context.getAsConstantArrayType(Ty);
7800 auto *VAT = Context.getAsVariableArrayType(Ty);
7801
7802 // We need all the dimension size except for the last dimension.
7803 assert((VAT || CAT || &Component == &*Components.begin()) &&
7804 "Should be either ConstantArray or VariableArray if not the "
7805 "first Component");
7806
7807 // Get element size if CurStrides is empty.
7808 if (CurStrides.empty()) {
7809 const Type *ElementType = nullptr;
7810 if (CAT)
7811 ElementType = CAT->getElementType().getTypePtr();
7812 else if (VAT)
7813 ElementType = VAT->getElementType().getTypePtr();
7814 else
7815 assert(&Component == &*Components.begin() &&
7816 "Only expect pointer (non CAT or VAT) when this is the "
7817 "first Component");
7818 // If ElementType is null, then it means the base is a pointer
7819 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7820 // for next iteration.
7821 if (ElementType) {
7822 // For the case that having pointer as base, we need to remove one
7823 // level of indirection.
7824 if (&Component != &*Components.begin())
7825 ElementType = ElementType->getPointeeOrArrayElementType();
7826 ElementTypeSize =
7827 Context.getTypeSizeInChars(ElementType).getQuantity();
7828 CurStrides.push_back(
7829 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7830 }
7831 }
7832 // Get dimension value except for the last dimension since we don't need
7833 // it.
7834 if (DimSizes.size() < Components.size() - 1) {
7835 if (CAT)
7836 DimSizes.push_back(
7837 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7838 else if (VAT)
7839 DimSizes.push_back(CGF.Builder.CreateIntCast(
7840 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7841 /*IsSigned=*/false));
7842 }
7843 }
7844
7845 // Skip the dummy dimension since we have already have its information.
7846 auto *DI = DimSizes.begin() + 1;
7847 // Product of dimension.
7848 llvm::Value *DimProd =
7849 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7850
7851 // Collect info for non-contiguous. Notice that offset, count, and stride
7852 // are only meaningful for array-section, so we insert a null for anything
7853 // other than array-section.
7854 // Also, the size of offset, count, and stride are not the same as
7855 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7856 // count, and stride are the same as the number of non-contiguous
7857 // declaration in target update to/from clause.
7858 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7859 Components) {
7860 const Expr *AssocExpr = Component.getAssociatedExpression();
7861
7862 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7863 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7864 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7865 /*isSigned=*/false);
7866 CurOffsets.push_back(Offset);
7867 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7868 CurStrides.push_back(CurStrides.back());
7869 continue;
7870 }
7871
7872 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7873
7874 if (!OASE)
7875 continue;
7876
7877 // Offset
7878 const Expr *OffsetExpr = OASE->getLowerBound();
7879 llvm::Value *Offset = nullptr;
7880 if (!OffsetExpr) {
7881 // If offset is absent, then we just set it to zero.
7882 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7883 } else {
7884 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7885 CGF.Int64Ty,
7886 /*isSigned=*/false);
7887 }
7888 CurOffsets.push_back(Offset);
7889
7890 // Count
7891 const Expr *CountExpr = OASE->getLength();
7892 llvm::Value *Count = nullptr;
7893 if (!CountExpr) {
7894 // In Clang, once a high dimension is an array section, we construct all
7895 // the lower dimension as array section, however, for case like
7896 // arr[0:2][2], Clang construct the inner dimension as an array section
7897 // but it actually is not in an array section form according to spec.
7898 if (!OASE->getColonLocFirst().isValid() &&
7899 !OASE->getColonLocSecond().isValid()) {
7900 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7901 } else {
7902 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7903 // When the length is absent it defaults to ⌈(size −
7904 // lower-bound)/stride⌉, where size is the size of the array
7905 // dimension.
7906 const Expr *StrideExpr = OASE->getStride();
7907 llvm::Value *Stride =
7908 StrideExpr
7909 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7910 CGF.Int64Ty, /*isSigned=*/false)
7911 : nullptr;
7912 if (Stride)
7913 Count = CGF.Builder.CreateUDiv(
7914 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7915 else
7916 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7917 }
7918 } else {
7919 Count = CGF.EmitScalarExpr(CountExpr);
7920 }
7921 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7922 CurCounts.push_back(Count);
7923
7924 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7925 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7926 // Offset Count Stride
7927 // D0 0 1 4 (int) <- dummy dimension
7928 // D1 0 2 8 (2 * (1) * 4)
7929 // D2 1 2 20 (1 * (1 * 5) * 4)
7930 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7931 const Expr *StrideExpr = OASE->getStride();
7932 llvm::Value *Stride =
7933 StrideExpr
7934 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7935 CGF.Int64Ty, /*isSigned=*/false)
7936 : nullptr;
7937 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7938 if (Stride)
7939 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7940 else
7941 CurStrides.push_back(DimProd);
7942 if (DI != DimSizes.end())
7943 ++DI;
7944 }
7945
7946 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7947 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7948 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7949 }
7950
7951 /// Return the adjusted map modifiers if the declaration a capture refers to
7952 /// appears in a first-private clause. This is expected to be used only with
7953 /// directives that start with 'target'.
7954 OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7955 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7956 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7957
7958 // A first private variable captured by reference will use only the
7959 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7960 // declaration is known as first-private in this handler.
7961 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7962 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7963 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7964 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7965 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7966 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7967 }
7968 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7969 if (I != LambdasMap.end())
7970 // for map(to: lambda): using user specified map type.
7971 return getMapTypeBits(
7972 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7973 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7974 /*AddPtrFlag=*/false,
7975 /*AddIsTargetParamFlag=*/false,
7976 /*isNonContiguous=*/false);
7977 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7978 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7979 }
7980
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7981 void getPlainLayout(const CXXRecordDecl *RD,
7982 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7983 bool AsBase) const {
7984 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7985
7986 llvm::StructType *St =
7987 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7988
7989 unsigned NumElements = St->getNumElements();
7990 llvm::SmallVector<
7991 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7992 RecordLayout(NumElements);
7993
7994 // Fill bases.
7995 for (const auto &I : RD->bases()) {
7996 if (I.isVirtual())
7997 continue;
7998
7999 QualType BaseTy = I.getType();
8000 const auto *Base = BaseTy->getAsCXXRecordDecl();
8001 // Ignore empty bases.
8002 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8003 CGF.getContext()
8004 .getASTRecordLayout(Base)
8005 .getNonVirtualSize()
8006 .isZero())
8007 continue;
8008
8009 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8010 RecordLayout[FieldIndex] = Base;
8011 }
8012 // Fill in virtual bases.
8013 for (const auto &I : RD->vbases()) {
8014 QualType BaseTy = I.getType();
8015 // Ignore empty bases.
8016 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8017 continue;
8018
8019 const auto *Base = BaseTy->getAsCXXRecordDecl();
8020 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8021 if (RecordLayout[FieldIndex])
8022 continue;
8023 RecordLayout[FieldIndex] = Base;
8024 }
8025 // Fill in all the fields.
8026 assert(!RD->isUnion() && "Unexpected union.");
8027 for (const auto *Field : RD->fields()) {
8028 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8029 // will fill in later.)
8030 if (!Field->isBitField() &&
8031 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8032 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8033 RecordLayout[FieldIndex] = Field;
8034 }
8035 }
8036 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8037 &Data : RecordLayout) {
8038 if (Data.isNull())
8039 continue;
8040 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8041 getPlainLayout(Base, Layout, /*AsBase=*/true);
8042 else
8043 Layout.push_back(cast<const FieldDecl *>(Data));
8044 }
8045 }
8046
8047 /// Generate all the base pointers, section pointers, sizes, map types, and
8048 /// mappers for the extracted mappable expressions (all included in \a
8049 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8050 /// pair of the relevant declaration and index where it occurs is appended to
8051 /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8052 void generateAllInfoForClauses(
8053 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8054 llvm::OpenMPIRBuilder &OMPBuilder,
8055 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8056 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8057 // We have to process the component lists that relate with the same
8058 // declaration in a single chunk so that we can generate the map flags
8059 // correctly. Therefore, we organize all lists in a map.
8060 enum MapKind { Present, Allocs, Other, Total };
8061 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8062 SmallVector<SmallVector<MapInfo, 8>, 4>>
8063 Info;
8064
8065 // Helper function to fill the information map for the different supported
8066 // clauses.
8067 auto &&InfoGen =
8068 [&Info, &SkipVarSet](
8069 const ValueDecl *D, MapKind Kind,
8070 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8071 OpenMPMapClauseKind MapType,
8072 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8073 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8074 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8075 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8076 if (SkipVarSet.contains(D))
8077 return;
8078 auto It = Info.try_emplace(D, Total).first;
8079 It->second[Kind].emplace_back(
8080 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8081 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8082 };
8083
8084 for (const auto *Cl : Clauses) {
8085 const auto *C = dyn_cast<OMPMapClause>(Cl);
8086 if (!C)
8087 continue;
8088 MapKind Kind = Other;
8089 if (llvm::is_contained(C->getMapTypeModifiers(),
8090 OMPC_MAP_MODIFIER_present))
8091 Kind = Present;
8092 else if (C->getMapType() == OMPC_MAP_alloc)
8093 Kind = Allocs;
8094 const auto *EI = C->getVarRefs().begin();
8095 for (const auto L : C->component_lists()) {
8096 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8097 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8098 C->getMapTypeModifiers(), {},
8099 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8100 E);
8101 ++EI;
8102 }
8103 }
8104 for (const auto *Cl : Clauses) {
8105 const auto *C = dyn_cast<OMPToClause>(Cl);
8106 if (!C)
8107 continue;
8108 MapKind Kind = Other;
8109 if (llvm::is_contained(C->getMotionModifiers(),
8110 OMPC_MOTION_MODIFIER_present))
8111 Kind = Present;
8112 const auto *EI = C->getVarRefs().begin();
8113 for (const auto L : C->component_lists()) {
8114 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8115 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8116 C->isImplicit(), std::get<2>(L), *EI);
8117 ++EI;
8118 }
8119 }
8120 for (const auto *Cl : Clauses) {
8121 const auto *C = dyn_cast<OMPFromClause>(Cl);
8122 if (!C)
8123 continue;
8124 MapKind Kind = Other;
8125 if (llvm::is_contained(C->getMotionModifiers(),
8126 OMPC_MOTION_MODIFIER_present))
8127 Kind = Present;
8128 const auto *EI = C->getVarRefs().begin();
8129 for (const auto L : C->component_lists()) {
8130 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8131 C->getMotionModifiers(),
8132 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8133 *EI);
8134 ++EI;
8135 }
8136 }
8137
8138 // Look at the use_device_ptr and use_device_addr clauses information and
8139 // mark the existing map entries as such. If there is no map information for
8140 // an entry in the use_device_ptr and use_device_addr list, we create one
8141 // with map type 'alloc' and zero size section. It is the user fault if that
8142 // was not mapped before. If there is no map information and the pointer is
8143 // a struct member, then we defer the emission of that entry until the whole
8144 // struct has been processed.
8145 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8146 SmallVector<DeferredDevicePtrEntryTy, 4>>
8147 DeferredInfo;
8148 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8149
8150 auto &&UseDeviceDataCombinedInfoGen =
8151 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8152 CodeGenFunction &CGF, bool IsDevAddr) {
8153 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8154 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8155 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8156 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8157 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8158 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8159 UseDeviceDataCombinedInfo.Sizes.push_back(
8160 llvm::Constant::getNullValue(CGF.Int64Ty));
8161 UseDeviceDataCombinedInfo.Types.push_back(
8162 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8163 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8164 };
8165
8166 auto &&MapInfoGen =
8167 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8168 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8169 OMPClauseMappableExprCommon::MappableExprComponentListRef
8170 Components,
8171 bool IsImplicit, bool IsDevAddr) {
8172 // We didn't find any match in our map information - generate a zero
8173 // size array section - if the pointer is a struct member we defer
8174 // this action until the whole struct has been processed.
8175 if (isa<MemberExpr>(IE)) {
8176 // Insert the pointer into Info to be processed by
8177 // generateInfoForComponentList. Because it is a member pointer
8178 // without a pointee, no entry will be generated for it, therefore
8179 // we need to generate one after the whole struct has been
8180 // processed. Nonetheless, generateInfoForComponentList must be
8181 // called to take the pointer into account for the calculation of
8182 // the range of the partial struct.
8183 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8184 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8185 IsDevAddr);
8186 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8187 } else {
8188 llvm::Value *Ptr;
8189 if (IsDevAddr) {
8190 if (IE->isGLValue())
8191 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8192 else
8193 Ptr = CGF.EmitScalarExpr(IE);
8194 } else {
8195 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8196 }
8197 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8198 }
8199 };
8200
8201 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8202 const Expr *IE, bool IsDevAddr) -> bool {
8203 // We potentially have map information for this declaration already.
8204 // Look for the first set of components that refer to it. If found,
8205 // return true.
8206 // If the first component is a member expression, we have to look into
8207 // 'this', which maps to null in the map of map information. Otherwise
8208 // look directly for the information.
8209 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8210 if (It != Info.end()) {
8211 bool Found = false;
8212 for (auto &Data : It->second) {
8213 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8214 return MI.Components.back().getAssociatedDeclaration() == VD;
8215 });
8216 // If we found a map entry, signal that the pointer has to be
8217 // returned and move on to the next declaration. Exclude cases where
8218 // the base pointer is mapped as array subscript, array section or
8219 // array shaping. The base address is passed as a pointer to base in
8220 // this case and cannot be used as a base for use_device_ptr list
8221 // item.
8222 if (CI != Data.end()) {
8223 if (IsDevAddr) {
8224 CI->ForDeviceAddr = IsDevAddr;
8225 CI->ReturnDevicePointer = true;
8226 Found = true;
8227 break;
8228 } else {
8229 auto PrevCI = std::next(CI->Components.rbegin());
8230 const auto *VarD = dyn_cast<VarDecl>(VD);
8231 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8232 isa<MemberExpr>(IE) ||
8233 !VD->getType().getNonReferenceType()->isPointerType() ||
8234 PrevCI == CI->Components.rend() ||
8235 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8236 VarD->hasLocalStorage()) {
8237 CI->ForDeviceAddr = IsDevAddr;
8238 CI->ReturnDevicePointer = true;
8239 Found = true;
8240 break;
8241 }
8242 }
8243 }
8244 }
8245 return Found;
8246 }
8247 return false;
8248 };
8249
8250 // Look at the use_device_ptr clause information and mark the existing map
8251 // entries as such. If there is no map information for an entry in the
8252 // use_device_ptr list, we create one with map type 'alloc' and zero size
8253 // section. It is the user fault if that was not mapped before. If there is
8254 // no map information and the pointer is a struct member, then we defer the
8255 // emission of that entry until the whole struct has been processed.
8256 for (const auto *Cl : Clauses) {
8257 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8258 if (!C)
8259 continue;
8260 for (const auto L : C->component_lists()) {
8261 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8262 std::get<1>(L);
8263 assert(!Components.empty() &&
8264 "Not expecting empty list of components!");
8265 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8266 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8267 const Expr *IE = Components.back().getAssociatedExpression();
8268 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8269 continue;
8270 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8271 /*IsDevAddr=*/false);
8272 }
8273 }
8274
8275 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8276 for (const auto *Cl : Clauses) {
8277 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8278 if (!C)
8279 continue;
8280 for (const auto L : C->component_lists()) {
8281 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8282 std::get<1>(L);
8283 assert(!std::get<1>(L).empty() &&
8284 "Not expecting empty list of components!");
8285 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8286 if (!Processed.insert(VD).second)
8287 continue;
8288 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8289 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8290 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8291 continue;
8292 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8293 /*IsDevAddr=*/true);
8294 }
8295 }
8296
8297 for (const auto &Data : Info) {
8298 StructRangeInfoTy PartialStruct;
8299 // Current struct information:
8300 MapCombinedInfoTy CurInfo;
8301 // Current struct base information:
8302 MapCombinedInfoTy StructBaseCurInfo;
8303 const Decl *D = Data.first;
8304 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8305 bool HasMapBasePtr = false;
8306 bool HasMapArraySec = false;
8307 if (VD && VD->getType()->isAnyPointerType()) {
8308 for (const auto &M : Data.second) {
8309 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8310 return isa_and_present<DeclRefExpr>(L.VarRef);
8311 });
8312 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8313 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8314 L.VarRef);
8315 });
8316 if (HasMapBasePtr && HasMapArraySec)
8317 break;
8318 }
8319 }
8320 for (const auto &M : Data.second) {
8321 for (const MapInfo &L : M) {
8322 assert(!L.Components.empty() &&
8323 "Not expecting declaration with no component lists.");
8324
8325 // Remember the current base pointer index.
8326 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8327 unsigned StructBasePointersIdx =
8328 StructBaseCurInfo.BasePointers.size();
8329 CurInfo.NonContigInfo.IsNonContiguous =
8330 L.Components.back().isNonContiguous();
8331 generateInfoForComponentList(
8332 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8333 CurInfo, StructBaseCurInfo, PartialStruct,
8334 /*IsFirstComponentList=*/false, L.IsImplicit,
8335 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8336 L.VarRef, /*OverlappedElements*/ {},
8337 HasMapBasePtr && HasMapArraySec);
8338
8339 // If this entry relates to a device pointer, set the relevant
8340 // declaration and add the 'return pointer' flag.
8341 if (L.ReturnDevicePointer) {
8342 // Check whether a value was added to either CurInfo or
8343 // StructBaseCurInfo and error if no value was added to either of
8344 // them:
8345 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8346 StructBasePointersIdx <
8347 StructBaseCurInfo.BasePointers.size()) &&
8348 "Unexpected number of mapped base pointers.");
8349
8350 // Choose a base pointer index which is always valid:
8351 const ValueDecl *RelevantVD =
8352 L.Components.back().getAssociatedDeclaration();
8353 assert(RelevantVD &&
8354 "No relevant declaration related with device pointer??");
8355
8356 // If StructBaseCurInfo has been updated this iteration then work on
8357 // the first new entry added to it i.e. make sure that when multiple
8358 // values are added to any of the lists, the first value added is
8359 // being modified by the assignments below (not the last value
8360 // added).
8361 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8362 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8363 RelevantVD;
8364 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8365 L.ForDeviceAddr ? DeviceInfoTy::Address
8366 : DeviceInfoTy::Pointer;
8367 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8368 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8369 } else {
8370 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8371 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8372 L.ForDeviceAddr ? DeviceInfoTy::Address
8373 : DeviceInfoTy::Pointer;
8374 CurInfo.Types[CurrentBasePointersIdx] |=
8375 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8376 }
8377 }
8378 }
8379 }
8380
8381 // Append any pending zero-length pointers which are struct members and
8382 // used with use_device_ptr or use_device_addr.
8383 auto CI = DeferredInfo.find(Data.first);
8384 if (CI != DeferredInfo.end()) {
8385 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8386 llvm::Value *BasePtr;
8387 llvm::Value *Ptr;
8388 if (L.ForDeviceAddr) {
8389 if (L.IE->isGLValue())
8390 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8391 else
8392 Ptr = this->CGF.EmitScalarExpr(L.IE);
8393 BasePtr = Ptr;
8394 // Entry is RETURN_PARAM. Also, set the placeholder value
8395 // MEMBER_OF=FFFF so that the entry is later updated with the
8396 // correct value of MEMBER_OF.
8397 CurInfo.Types.push_back(
8398 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8399 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8400 } else {
8401 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8402 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8403 L.IE->getExprLoc());
8404 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8405 // placeholder value MEMBER_OF=FFFF so that the entry is later
8406 // updated with the correct value of MEMBER_OF.
8407 CurInfo.Types.push_back(
8408 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8409 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8410 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8411 }
8412 CurInfo.Exprs.push_back(L.VD);
8413 CurInfo.BasePointers.emplace_back(BasePtr);
8414 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8415 CurInfo.DevicePointers.emplace_back(
8416 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8417 CurInfo.Pointers.push_back(Ptr);
8418 CurInfo.Sizes.push_back(
8419 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8420 CurInfo.Mappers.push_back(nullptr);
8421 }
8422 }
8423
8424 // Unify entries in one list making sure the struct mapping precedes the
8425 // individual fields:
8426 MapCombinedInfoTy UnionCurInfo;
8427 UnionCurInfo.append(StructBaseCurInfo);
8428 UnionCurInfo.append(CurInfo);
8429
8430 // If there is an entry in PartialStruct it means we have a struct with
8431 // individual members mapped. Emit an extra combined entry.
8432 if (PartialStruct.Base.isValid()) {
8433 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8434 // Emit a combined entry:
8435 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8436 /*IsMapThis*/ !VD, OMPBuilder, VD);
8437 }
8438
8439 // We need to append the results of this capture to what we already have.
8440 CombinedInfo.append(UnionCurInfo);
8441 }
8442 // Append data for use_device_ptr clauses.
8443 CombinedInfo.append(UseDeviceDataCombinedInfo);
8444 }
8445
8446 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8447 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8448 : CurDir(&Dir), CGF(CGF) {
8449 // Extract firstprivate clause information.
8450 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8451 for (const auto *D : C->varlist())
8452 FirstPrivateDecls.try_emplace(
8453 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8454 // Extract implicit firstprivates from uses_allocators clauses.
8455 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8456 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8457 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8458 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8459 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8460 /*Implicit=*/true);
8461 else if (const auto *VD = dyn_cast<VarDecl>(
8462 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8463 ->getDecl()))
8464 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8465 }
8466 }
8467 // Extract device pointer clause information.
8468 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8469 for (auto L : C->component_lists())
8470 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8471 // Extract device addr clause information.
8472 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8473 for (auto L : C->component_lists())
8474 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8475 // Extract map information.
8476 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8477 if (C->getMapType() != OMPC_MAP_to)
8478 continue;
8479 for (auto L : C->component_lists()) {
8480 const ValueDecl *VD = std::get<0>(L);
8481 const auto *RD = VD ? VD->getType()
8482 .getCanonicalType()
8483 .getNonReferenceType()
8484 ->getAsCXXRecordDecl()
8485 : nullptr;
8486 if (RD && RD->isLambda())
8487 LambdasMap.try_emplace(std::get<0>(L), C);
8488 }
8489 }
8490 }
8491
8492 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8493 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8494 : CurDir(&Dir), CGF(CGF) {}
8495
8496 /// Generate code for the combined entry if we have a partially mapped struct
8497 /// and take care of the mapping flags of the arguments corresponding to
8498 /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,bool IsMapThis,llvm::OpenMPIRBuilder & OMPBuilder,const ValueDecl * VD=nullptr,unsigned OffsetForMemberOfFlag=0,bool NotTargetParams=true) const8499 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8500 MapFlagsArrayTy &CurTypes,
8501 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8502 llvm::OpenMPIRBuilder &OMPBuilder,
8503 const ValueDecl *VD = nullptr,
8504 unsigned OffsetForMemberOfFlag = 0,
8505 bool NotTargetParams = true) const {
8506 if (CurTypes.size() == 1 &&
8507 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8508 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8509 !PartialStruct.IsArraySection)
8510 return;
8511 Address LBAddr = PartialStruct.LowestElem.second;
8512 Address HBAddr = PartialStruct.HighestElem.second;
8513 if (PartialStruct.HasCompleteRecord) {
8514 LBAddr = PartialStruct.LB;
8515 HBAddr = PartialStruct.LB;
8516 }
8517 CombinedInfo.Exprs.push_back(VD);
8518 // Base is the base of the struct
8519 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8520 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8521 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8522 // Pointer is the address of the lowest element
8523 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8524 const CXXMethodDecl *MD =
8525 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8526 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8527 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8528 // There should not be a mapper for a combined entry.
8529 if (HasBaseClass) {
8530 // OpenMP 5.2 148:21:
8531 // If the target construct is within a class non-static member function,
8532 // and a variable is an accessible data member of the object for which the
8533 // non-static data member function is invoked, the variable is treated as
8534 // if the this[:1] expression had appeared in a map clause with a map-type
8535 // of tofrom.
8536 // Emit this[:1]
8537 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8538 QualType Ty = MD->getFunctionObjectParameterType();
8539 llvm::Value *Size =
8540 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8541 /*isSigned=*/true);
8542 CombinedInfo.Sizes.push_back(Size);
8543 } else {
8544 CombinedInfo.Pointers.push_back(LB);
8545 // Size is (addr of {highest+1} element) - (addr of lowest element)
8546 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8547 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8548 HBAddr.getElementType(), HB, /*Idx0=*/1);
8549 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8550 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8551 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8552 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8553 /*isSigned=*/false);
8554 CombinedInfo.Sizes.push_back(Size);
8555 }
8556 CombinedInfo.Mappers.push_back(nullptr);
8557 // Map type is always TARGET_PARAM, if generate info for captures.
8558 CombinedInfo.Types.push_back(
8559 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8560 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8561 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8562 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8563 // If any element has the present modifier, then make sure the runtime
8564 // doesn't attempt to allocate the struct.
8565 if (CurTypes.end() !=
8566 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8567 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8568 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8569 }))
8570 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8571 // Remove TARGET_PARAM flag from the first element
8572 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8573 // If any element has the ompx_hold modifier, then make sure the runtime
8574 // uses the hold reference count for the struct as a whole so that it won't
8575 // be unmapped by an extra dynamic reference count decrement. Add it to all
8576 // elements as well so the runtime knows which reference count to check
8577 // when determining whether it's time for device-to-host transfers of
8578 // individual elements.
8579 if (CurTypes.end() !=
8580 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8581 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8582 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8583 })) {
8584 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8585 for (auto &M : CurTypes)
8586 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8587 }
8588
8589 // All other current entries will be MEMBER_OF the combined entry
8590 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8591 // 0xFFFF in the MEMBER_OF field).
8592 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8593 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8594 for (auto &M : CurTypes)
8595 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8596 }
8597
8598 /// Generate all the base pointers, section pointers, sizes, map types, and
8599 /// mappers for the extracted mappable expressions (all included in \a
8600 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8601 /// pair of the relevant declaration and index where it occurs is appended to
8602 /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8603 void generateAllInfo(
8604 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8605 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8606 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8607 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8608 "Expect a executable directive");
8609 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8610 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8611 SkipVarSet);
8612 }
8613
8614 /// Generate all the base pointers, section pointers, sizes, map types, and
8615 /// mappers for the extracted map clauses of user-defined mapper (all included
8616 /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder) const8617 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8618 llvm::OpenMPIRBuilder &OMPBuilder) const {
8619 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8620 "Expect a declare mapper directive");
8621 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8622 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8623 OMPBuilder);
8624 }
8625
8626 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8627 void generateInfoForLambdaCaptures(
8628 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8629 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8630 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8631 const auto *RD = VDType->getAsCXXRecordDecl();
8632 if (!RD || !RD->isLambda())
8633 return;
8634 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8635 CGF.getContext().getDeclAlign(VD));
8636 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8637 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8638 FieldDecl *ThisCapture = nullptr;
8639 RD->getCaptureFields(Captures, ThisCapture);
8640 if (ThisCapture) {
8641 LValue ThisLVal =
8642 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8643 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8644 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8645 VDLVal.getPointer(CGF));
8646 CombinedInfo.Exprs.push_back(VD);
8647 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8648 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8649 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8650 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8651 CombinedInfo.Sizes.push_back(
8652 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8653 CGF.Int64Ty, /*isSigned=*/true));
8654 CombinedInfo.Types.push_back(
8655 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8656 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8657 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8658 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8659 CombinedInfo.Mappers.push_back(nullptr);
8660 }
8661 for (const LambdaCapture &LC : RD->captures()) {
8662 if (!LC.capturesVariable())
8663 continue;
8664 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8665 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8666 continue;
8667 auto It = Captures.find(VD);
8668 assert(It != Captures.end() && "Found lambda capture without field.");
8669 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8670 if (LC.getCaptureKind() == LCK_ByRef) {
8671 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8672 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8673 VDLVal.getPointer(CGF));
8674 CombinedInfo.Exprs.push_back(VD);
8675 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8676 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8677 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8678 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8679 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8680 CGF.getTypeSize(
8681 VD->getType().getCanonicalType().getNonReferenceType()),
8682 CGF.Int64Ty, /*isSigned=*/true));
8683 } else {
8684 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8685 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8686 VDLVal.getPointer(CGF));
8687 CombinedInfo.Exprs.push_back(VD);
8688 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8689 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8690 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8691 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8692 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8693 }
8694 CombinedInfo.Types.push_back(
8695 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8696 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8697 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8698 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8699 CombinedInfo.Mappers.push_back(nullptr);
8700 }
8701 }
8702
8703 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8704 void adjustMemberOfForLambdaCaptures(
8705 llvm::OpenMPIRBuilder &OMPBuilder,
8706 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8707 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8708 MapFlagsArrayTy &Types) const {
8709 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8710 // Set correct member_of idx for all implicit lambda captures.
8711 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8712 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8713 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8714 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8715 continue;
8716 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8717 assert(BasePtr && "Unable to find base lambda address.");
8718 int TgtIdx = -1;
8719 for (unsigned J = I; J > 0; --J) {
8720 unsigned Idx = J - 1;
8721 if (Pointers[Idx] != BasePtr)
8722 continue;
8723 TgtIdx = Idx;
8724 break;
8725 }
8726 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8727 // All other current entries will be MEMBER_OF the combined entry
8728 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8729 // 0xFFFF in the MEMBER_OF field).
8730 OpenMPOffloadMappingFlags MemberOfFlag =
8731 OMPBuilder.getMemberOfFlag(TgtIdx);
8732 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8733 }
8734 }
8735
8736 /// For a capture that has an associated clause, generate the base pointers,
8737 /// section pointers, sizes, map types, and mappers (all included in
8738 /// \a CurCaptureVarInfo).
generateInfoForCaptureFromClauseInfo(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CurCaptureVarInfo,llvm::OpenMPIRBuilder & OMPBuilder,unsigned OffsetForMemberOfFlag) const8739 void generateInfoForCaptureFromClauseInfo(
8740 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8741 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8742 unsigned OffsetForMemberOfFlag) const {
8743 assert(!Cap->capturesVariableArrayType() &&
8744 "Not expecting to generate map info for a variable array type!");
8745
8746 // We need to know when we generating information for the first component
8747 const ValueDecl *VD = Cap->capturesThis()
8748 ? nullptr
8749 : Cap->getCapturedVar()->getCanonicalDecl();
8750
8751 // for map(to: lambda): skip here, processing it in
8752 // generateDefaultMapInfo
8753 if (LambdasMap.count(VD))
8754 return;
8755
8756 // If this declaration appears in a is_device_ptr clause we just have to
8757 // pass the pointer by value. If it is a reference to a declaration, we just
8758 // pass its value.
8759 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8760 CurCaptureVarInfo.Exprs.push_back(VD);
8761 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
8762 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
8763 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8764 CurCaptureVarInfo.Pointers.push_back(Arg);
8765 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8766 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8767 /*isSigned=*/true));
8768 CurCaptureVarInfo.Types.push_back(
8769 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8770 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8771 CurCaptureVarInfo.Mappers.push_back(nullptr);
8772 return;
8773 }
8774
8775 MapDataArrayTy DeclComponentLists;
8776 // For member fields list in is_device_ptr, store it in
8777 // DeclComponentLists for generating components info.
8778 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8779 auto It = DevPointersMap.find(VD);
8780 if (It != DevPointersMap.end())
8781 for (const auto &MCL : It->second)
8782 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8783 /*IsImpicit = */ true, nullptr,
8784 nullptr);
8785 auto I = HasDevAddrsMap.find(VD);
8786 if (I != HasDevAddrsMap.end())
8787 for (const auto &MCL : I->second)
8788 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8789 /*IsImpicit = */ true, nullptr,
8790 nullptr);
8791 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8792 "Expect a executable directive");
8793 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8794 bool HasMapBasePtr = false;
8795 bool HasMapArraySec = false;
8796 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8797 const auto *EI = C->getVarRefs().begin();
8798 for (const auto L : C->decl_component_lists(VD)) {
8799 const ValueDecl *VDecl, *Mapper;
8800 // The Expression is not correct if the mapping is implicit
8801 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8802 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8803 std::tie(VDecl, Components, Mapper) = L;
8804 assert(VDecl == VD && "We got information for the wrong declaration??");
8805 assert(!Components.empty() &&
8806 "Not expecting declaration with no component lists.");
8807 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8808 HasMapBasePtr = true;
8809 if (VD && E && VD->getType()->isAnyPointerType() &&
8810 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8811 HasMapArraySec = true;
8812 DeclComponentLists.emplace_back(Components, C->getMapType(),
8813 C->getMapTypeModifiers(),
8814 C->isImplicit(), Mapper, E);
8815 ++EI;
8816 }
8817 }
8818 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8819 const MapData &RHS) {
8820 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8821 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8822 bool HasPresent =
8823 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8824 bool HasAllocs = MapType == OMPC_MAP_alloc;
8825 MapModifiers = std::get<2>(RHS);
8826 MapType = std::get<1>(LHS);
8827 bool HasPresentR =
8828 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8829 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8830 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8831 });
8832
8833 auto GenerateInfoForComponentLists =
8834 [&](ArrayRef<MapData> DeclComponentLists,
8835 bool IsEligibleForTargetParamFlag) {
8836 MapCombinedInfoTy CurInfoForComponentLists;
8837 StructRangeInfoTy PartialStruct;
8838
8839 if (DeclComponentLists.empty())
8840 return;
8841
8842 generateInfoForCaptureFromComponentLists(
8843 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
8844 IsEligibleForTargetParamFlag,
8845 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8846
8847 // If there is an entry in PartialStruct it means we have a
8848 // struct with individual members mapped. Emit an extra combined
8849 // entry.
8850 if (PartialStruct.Base.isValid()) {
8851 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
8852 emitCombinedEntry(
8853 CurCaptureVarInfo, CurInfoForComponentLists.Types,
8854 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
8855 OffsetForMemberOfFlag,
8856 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8857 }
8858
8859 // Return if we didn't add any entries.
8860 if (CurInfoForComponentLists.BasePointers.empty())
8861 return;
8862
8863 CurCaptureVarInfo.append(CurInfoForComponentLists);
8864 };
8865
8866 GenerateInfoForComponentLists(DeclComponentLists,
8867 /*IsEligibleForTargetParamFlag=*/true);
8868 }
8869
8870 /// Generate the base pointers, section pointers, sizes, map types, and
8871 /// mappers associated to \a DeclComponentLists for a given capture
8872 /// \a VD (all included in \a CurComponentListInfo).
generateInfoForCaptureFromComponentLists(const ValueDecl * VD,ArrayRef<MapData> DeclComponentLists,MapCombinedInfoTy & CurComponentListInfo,StructRangeInfoTy & PartialStruct,bool IsListEligibleForTargetParamFlag,bool AreBothBasePtrAndPteeMapped=false) const8873 void generateInfoForCaptureFromComponentLists(
8874 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8875 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8876 bool IsListEligibleForTargetParamFlag,
8877 bool AreBothBasePtrAndPteeMapped = false) const {
8878 // Find overlapping elements (including the offset from the base element).
8879 llvm::SmallDenseMap<
8880 const MapData *,
8881 llvm::SmallVector<
8882 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8883 4>
8884 OverlappedData;
8885 size_t Count = 0;
8886 for (const MapData &L : DeclComponentLists) {
8887 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8888 OpenMPMapClauseKind MapType;
8889 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8890 bool IsImplicit;
8891 const ValueDecl *Mapper;
8892 const Expr *VarRef;
8893 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8894 L;
8895 ++Count;
8896 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8897 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8898 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8899 VarRef) = L1;
8900 auto CI = Components.rbegin();
8901 auto CE = Components.rend();
8902 auto SI = Components1.rbegin();
8903 auto SE = Components1.rend();
8904 for (; CI != CE && SI != SE; ++CI, ++SI) {
8905 if (CI->getAssociatedExpression()->getStmtClass() !=
8906 SI->getAssociatedExpression()->getStmtClass())
8907 break;
8908 // Are we dealing with different variables/fields?
8909 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8910 break;
8911 }
8912 // Found overlapping if, at least for one component, reached the head
8913 // of the components list.
8914 if (CI == CE || SI == SE) {
8915 // Ignore it if it is the same component.
8916 if (CI == CE && SI == SE)
8917 continue;
8918 const auto It = (SI == SE) ? CI : SI;
8919 // If one component is a pointer and another one is a kind of
8920 // dereference of this pointer (array subscript, section, dereference,
8921 // etc.), it is not an overlapping.
8922 // Same, if one component is a base and another component is a
8923 // dereferenced pointer memberexpr with the same base.
8924 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8925 (std::prev(It)->getAssociatedDeclaration() &&
8926 std::prev(It)
8927 ->getAssociatedDeclaration()
8928 ->getType()
8929 ->isPointerType()) ||
8930 (It->getAssociatedDeclaration() &&
8931 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8932 std::next(It) != CE && std::next(It) != SE))
8933 continue;
8934 const MapData &BaseData = CI == CE ? L : L1;
8935 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8936 SI == SE ? Components : Components1;
8937 OverlappedData[&BaseData].push_back(SubData);
8938 }
8939 }
8940 }
8941 // Sort the overlapped elements for each item.
8942 llvm::SmallVector<const FieldDecl *, 4> Layout;
8943 if (!OverlappedData.empty()) {
8944 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8945 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8946 while (BaseType != OrigType) {
8947 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8948 OrigType = BaseType->getPointeeOrArrayElementType();
8949 }
8950
8951 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8952 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8953 else {
8954 const auto *RD = BaseType->getAsRecordDecl();
8955 Layout.append(RD->field_begin(), RD->field_end());
8956 }
8957 }
8958 for (auto &Pair : OverlappedData) {
8959 llvm::stable_sort(
8960 Pair.getSecond(),
8961 [&Layout](
8962 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8963 OMPClauseMappableExprCommon::MappableExprComponentListRef
8964 Second) {
8965 auto CI = First.rbegin();
8966 auto CE = First.rend();
8967 auto SI = Second.rbegin();
8968 auto SE = Second.rend();
8969 for (; CI != CE && SI != SE; ++CI, ++SI) {
8970 if (CI->getAssociatedExpression()->getStmtClass() !=
8971 SI->getAssociatedExpression()->getStmtClass())
8972 break;
8973 // Are we dealing with different variables/fields?
8974 if (CI->getAssociatedDeclaration() !=
8975 SI->getAssociatedDeclaration())
8976 break;
8977 }
8978
8979 // Lists contain the same elements.
8980 if (CI == CE && SI == SE)
8981 return false;
8982
8983 // List with less elements is less than list with more elements.
8984 if (CI == CE || SI == SE)
8985 return CI == CE;
8986
8987 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8988 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8989 if (FD1->getParent() == FD2->getParent())
8990 return FD1->getFieldIndex() < FD2->getFieldIndex();
8991 const auto *It =
8992 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8993 return FD == FD1 || FD == FD2;
8994 });
8995 return *It == FD1;
8996 });
8997 }
8998
8999 // Associated with a capture, because the mapping flags depend on it.
9000 // Go through all of the elements with the overlapped elements.
9001 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9002 MapCombinedInfoTy StructBaseCombinedInfo;
9003 for (const auto &Pair : OverlappedData) {
9004 const MapData &L = *Pair.getFirst();
9005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9006 OpenMPMapClauseKind MapType;
9007 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9008 bool IsImplicit;
9009 const ValueDecl *Mapper;
9010 const Expr *VarRef;
9011 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9012 L;
9013 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9014 OverlappedComponents = Pair.getSecond();
9015 generateInfoForComponentList(
9016 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9017 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9018 /*GenerateAllInfoForClauses*/ false, Mapper,
9019 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9020 AddTargetParamFlag = false;
9021 }
9022 // Go through other elements without overlapped elements.
9023 for (const MapData &L : DeclComponentLists) {
9024 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9025 OpenMPMapClauseKind MapType;
9026 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9027 bool IsImplicit;
9028 const ValueDecl *Mapper;
9029 const Expr *VarRef;
9030 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9031 L;
9032 auto It = OverlappedData.find(&L);
9033 if (It == OverlappedData.end())
9034 generateInfoForComponentList(
9035 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9036 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9037 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9038 /*ForDeviceAddr=*/false, VD, VarRef,
9039 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9040 AddTargetParamFlag = false;
9041 }
9042 }
9043
9044 /// Generate the default map information for a given capture \a CI,
9045 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const9046 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9047 const FieldDecl &RI, llvm::Value *CV,
9048 MapCombinedInfoTy &CombinedInfo) const {
9049 bool IsImplicit = true;
9050 // Do the default mapping.
9051 if (CI.capturesThis()) {
9052 CombinedInfo.Exprs.push_back(nullptr);
9053 CombinedInfo.BasePointers.push_back(CV);
9054 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9055 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9056 CombinedInfo.Pointers.push_back(CV);
9057 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9058 CombinedInfo.Sizes.push_back(
9059 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9060 CGF.Int64Ty, /*isSigned=*/true));
9061 // Default map type.
9062 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9063 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9064 } else if (CI.capturesVariableByCopy()) {
9065 const VarDecl *VD = CI.getCapturedVar();
9066 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9067 CombinedInfo.BasePointers.push_back(CV);
9068 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9069 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9070 CombinedInfo.Pointers.push_back(CV);
9071 if (!RI.getType()->isAnyPointerType()) {
9072 // We have to signal to the runtime captures passed by value that are
9073 // not pointers.
9074 CombinedInfo.Types.push_back(
9075 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9076 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9077 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9078 } else {
9079 // Pointers are implicitly mapped with a zero size and no flags
9080 // (other than first map that is added for all implicit maps).
9081 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9082 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9083 }
9084 auto I = FirstPrivateDecls.find(VD);
9085 if (I != FirstPrivateDecls.end())
9086 IsImplicit = I->getSecond();
9087 } else {
9088 assert(CI.capturesVariable() && "Expected captured reference.");
9089 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9090 QualType ElementType = PtrTy->getPointeeType();
9091 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9092 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9093 // The default map type for a scalar/complex type is 'to' because by
9094 // default the value doesn't have to be retrieved. For an aggregate
9095 // type, the default is 'tofrom'.
9096 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9097 const VarDecl *VD = CI.getCapturedVar();
9098 auto I = FirstPrivateDecls.find(VD);
9099 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9100 CombinedInfo.BasePointers.push_back(CV);
9101 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9102 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9103 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9104 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9105 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9106 AlignmentSource::Decl));
9107 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9108 } else {
9109 CombinedInfo.Pointers.push_back(CV);
9110 }
9111 if (I != FirstPrivateDecls.end())
9112 IsImplicit = I->getSecond();
9113 }
9114 // Every default map produces a single argument which is a target parameter.
9115 CombinedInfo.Types.back() |=
9116 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9117
9118 // Add flag stating this is an implicit map.
9119 if (IsImplicit)
9120 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9121
9122 // No user-defined mapper for default mapping.
9123 CombinedInfo.Mappers.push_back(nullptr);
9124 }
9125 };
9126 } // anonymous namespace
9127
9128 // Try to extract the base declaration from a `this->x` expression if possible.
getDeclFromThisExpr(const Expr * E)9129 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9130 if (!E)
9131 return nullptr;
9132
9133 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9134 if (const MemberExpr *ME =
9135 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9136 return ME->getMemberDecl();
9137 return nullptr;
9138 }
9139
9140 /// Emit a string constant containing the names of the values mapped to the
9141 /// offloading runtime library.
9142 static llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)9143 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9144 MappableExprsHandler::MappingExprInfo &MapExprs) {
9145
9146 uint32_t SrcLocStrSize;
9147 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9148 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9149
9150 SourceLocation Loc;
9151 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9152 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9153 Loc = VD->getLocation();
9154 else
9155 Loc = MapExprs.getMapExpr()->getExprLoc();
9156 } else {
9157 Loc = MapExprs.getMapDecl()->getLocation();
9158 }
9159
9160 std::string ExprName;
9161 if (MapExprs.getMapExpr()) {
9162 PrintingPolicy P(CGF.getContext().getLangOpts());
9163 llvm::raw_string_ostream OS(ExprName);
9164 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9165 } else {
9166 ExprName = MapExprs.getMapDecl()->getNameAsString();
9167 }
9168
9169 std::string FileName;
9170 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9171 if (auto *DbgInfo = CGF.getDebugInfo())
9172 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9173 else
9174 FileName = PLoc.getFilename();
9175 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9176 PLoc.getColumn(), SrcLocStrSize);
9177 }
9178 /// Emit the arrays used to pass the captures and map information to the
9179 /// offloading runtime library. If there is no map or capture information,
9180 /// return nullptr by reference.
emitOffloadingArraysAndArgs(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false,bool ForEndCall=false)9181 static void emitOffloadingArraysAndArgs(
9182 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9183 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9184 bool IsNonContiguous = false, bool ForEndCall = false) {
9185 CodeGenModule &CGM = CGF.CGM;
9186
9187 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9188 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9189 CGF.AllocaInsertPt->getIterator());
9190 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9191 CGF.Builder.GetInsertPoint());
9192
9193 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9194 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9195 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9196 }
9197 };
9198
9199 auto CustomMapperCB = [&](unsigned int I) {
9200 llvm::Function *MFunc = nullptr;
9201 if (CombinedInfo.Mappers[I]) {
9202 Info.HasMapper = true;
9203 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9204 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9205 }
9206 return MFunc;
9207 };
9208 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9209 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9210 IsNonContiguous, ForEndCall, DeviceAddrCB));
9211 }
9212
9213 /// Check for inner distribute directive.
9214 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)9215 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9216 const auto *CS = D.getInnermostCapturedStmt();
9217 const auto *Body =
9218 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9219 const Stmt *ChildStmt =
9220 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9221
9222 if (const auto *NestedDir =
9223 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9224 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9225 switch (D.getDirectiveKind()) {
9226 case OMPD_target:
9227 // For now, treat 'target' with nested 'teams loop' as if it's
9228 // distributed (target teams distribute).
9229 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9230 return NestedDir;
9231 if (DKind == OMPD_teams) {
9232 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9233 /*IgnoreCaptured=*/true);
9234 if (!Body)
9235 return nullptr;
9236 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9237 if (const auto *NND =
9238 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9239 DKind = NND->getDirectiveKind();
9240 if (isOpenMPDistributeDirective(DKind))
9241 return NND;
9242 }
9243 }
9244 return nullptr;
9245 case OMPD_target_teams:
9246 if (isOpenMPDistributeDirective(DKind))
9247 return NestedDir;
9248 return nullptr;
9249 case OMPD_target_parallel:
9250 case OMPD_target_simd:
9251 case OMPD_target_parallel_for:
9252 case OMPD_target_parallel_for_simd:
9253 return nullptr;
9254 case OMPD_target_teams_distribute:
9255 case OMPD_target_teams_distribute_simd:
9256 case OMPD_target_teams_distribute_parallel_for:
9257 case OMPD_target_teams_distribute_parallel_for_simd:
9258 case OMPD_parallel:
9259 case OMPD_for:
9260 case OMPD_parallel_for:
9261 case OMPD_parallel_master:
9262 case OMPD_parallel_sections:
9263 case OMPD_for_simd:
9264 case OMPD_parallel_for_simd:
9265 case OMPD_cancel:
9266 case OMPD_cancellation_point:
9267 case OMPD_ordered:
9268 case OMPD_threadprivate:
9269 case OMPD_allocate:
9270 case OMPD_task:
9271 case OMPD_simd:
9272 case OMPD_tile:
9273 case OMPD_unroll:
9274 case OMPD_sections:
9275 case OMPD_section:
9276 case OMPD_single:
9277 case OMPD_master:
9278 case OMPD_critical:
9279 case OMPD_taskyield:
9280 case OMPD_barrier:
9281 case OMPD_taskwait:
9282 case OMPD_taskgroup:
9283 case OMPD_atomic:
9284 case OMPD_flush:
9285 case OMPD_depobj:
9286 case OMPD_scan:
9287 case OMPD_teams:
9288 case OMPD_target_data:
9289 case OMPD_target_exit_data:
9290 case OMPD_target_enter_data:
9291 case OMPD_distribute:
9292 case OMPD_distribute_simd:
9293 case OMPD_distribute_parallel_for:
9294 case OMPD_distribute_parallel_for_simd:
9295 case OMPD_teams_distribute:
9296 case OMPD_teams_distribute_simd:
9297 case OMPD_teams_distribute_parallel_for:
9298 case OMPD_teams_distribute_parallel_for_simd:
9299 case OMPD_target_update:
9300 case OMPD_declare_simd:
9301 case OMPD_declare_variant:
9302 case OMPD_begin_declare_variant:
9303 case OMPD_end_declare_variant:
9304 case OMPD_declare_target:
9305 case OMPD_end_declare_target:
9306 case OMPD_declare_reduction:
9307 case OMPD_declare_mapper:
9308 case OMPD_taskloop:
9309 case OMPD_taskloop_simd:
9310 case OMPD_master_taskloop:
9311 case OMPD_master_taskloop_simd:
9312 case OMPD_parallel_master_taskloop:
9313 case OMPD_parallel_master_taskloop_simd:
9314 case OMPD_requires:
9315 case OMPD_metadirective:
9316 case OMPD_unknown:
9317 default:
9318 llvm_unreachable("Unexpected directive.");
9319 }
9320 }
9321
9322 return nullptr;
9323 }
9324
9325 /// Emit the user-defined mapper function. The code generation follows the
9326 /// pattern in the example below.
9327 /// \code
9328 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9329 /// void *base, void *begin,
9330 /// int64_t size, int64_t type,
9331 /// void *name = nullptr) {
9332 /// // Allocate space for an array section first or add a base/begin for
9333 /// // pointer dereference.
9334 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9335 /// !maptype.IsDelete)
9336 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9337 /// size*sizeof(Ty), clearToFromMember(type));
9338 /// // Map members.
9339 /// for (unsigned i = 0; i < size; i++) {
9340 /// // For each component specified by this mapper:
9341 /// for (auto c : begin[i]->all_components) {
9342 /// if (c.hasMapper())
9343 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9344 /// c.arg_type, c.arg_name);
9345 /// else
9346 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9347 /// c.arg_begin, c.arg_size, c.arg_type,
9348 /// c.arg_name);
9349 /// }
9350 /// }
9351 /// // Delete the array section.
9352 /// if (size > 1 && maptype.IsDelete)
9353 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9354 /// size*sizeof(Ty), clearToFromMember(type));
9355 /// }
9356 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9357 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9358 CodeGenFunction *CGF) {
9359 if (UDMMap.count(D) > 0)
9360 return;
9361 ASTContext &C = CGM.getContext();
9362 QualType Ty = D->getType();
9363 auto *MapperVarDecl =
9364 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9365 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9366 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9367
9368 CodeGenFunction MapperCGF(CGM);
9369 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9370 auto PrivatizeAndGenMapInfoCB =
9371 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9372 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9373 MapperCGF.Builder.restoreIP(CodeGenIP);
9374
9375 // Privatize the declared variable of mapper to be the current array
9376 // element.
9377 Address PtrCurrent(
9378 PtrPHI, ElemTy,
9379 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9380 .getAlignment()
9381 .alignmentOfArrayElement(ElementSize));
9382 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9383 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9384 (void)Scope.Privatize();
9385
9386 // Get map clause information.
9387 MappableExprsHandler MEHandler(*D, MapperCGF);
9388 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9389
9390 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9391 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9392 };
9393 if (CGM.getCodeGenOpts().getDebugInfo() !=
9394 llvm::codegenoptions::NoDebugInfo) {
9395 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9396 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9397 FillInfoMap);
9398 }
9399
9400 return CombinedInfo;
9401 };
9402
9403 auto CustomMapperCB = [&](unsigned I) {
9404 llvm::Function *MapperFunc = nullptr;
9405 if (CombinedInfo.Mappers[I]) {
9406 // Call the corresponding mapper function.
9407 MapperFunc = getOrCreateUserDefinedMapperFunc(
9408 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9409 assert(MapperFunc && "Expect a valid mapper function is available.");
9410 }
9411 return MapperFunc;
9412 };
9413
9414 SmallString<64> TyStr;
9415 llvm::raw_svector_ostream Out(TyStr);
9416 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9417 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9418
9419 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9420 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9421 UDMMap.try_emplace(D, NewFn);
9422 if (CGF)
9423 FunctionUDMMap[CGF->CurFn].push_back(D);
9424 }
9425
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)9426 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9427 const OMPDeclareMapperDecl *D) {
9428 auto I = UDMMap.find(D);
9429 if (I != UDMMap.end())
9430 return I->second;
9431 emitUserDefinedMapper(D);
9432 return UDMMap.lookup(D);
9433 }
9434
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9435 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9436 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9437 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9438 const OMPLoopDirective &D)>
9439 SizeEmitter) {
9440 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9441 const OMPExecutableDirective *TD = &D;
9442 // Get nested teams distribute kind directive, if any. For now, treat
9443 // 'target_teams_loop' as if it's really a target_teams_distribute.
9444 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9445 Kind != OMPD_target_teams_loop)
9446 TD = getNestedDistributeDirective(CGM.getContext(), D);
9447 if (!TD)
9448 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9449
9450 const auto *LD = cast<OMPLoopDirective>(TD);
9451 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9452 return NumIterations;
9453 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9454 }
9455
9456 static void
emitTargetCallFallback(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9457 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9458 const OMPExecutableDirective &D,
9459 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9460 bool RequiresOuterTask, const CapturedStmt &CS,
9461 bool OffloadingMandatory, CodeGenFunction &CGF) {
9462 if (OffloadingMandatory) {
9463 CGF.Builder.CreateUnreachable();
9464 } else {
9465 if (RequiresOuterTask) {
9466 CapturedVars.clear();
9467 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9468 }
9469 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9470 CapturedVars);
9471 }
9472 }
9473
emitDeviceID(llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,CodeGenFunction & CGF)9474 static llvm::Value *emitDeviceID(
9475 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9476 CodeGenFunction &CGF) {
9477 // Emit device ID if any.
9478 llvm::Value *DeviceID;
9479 if (Device.getPointer()) {
9480 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9481 Device.getInt() == OMPC_DEVICE_device_num) &&
9482 "Expected device_num modifier.");
9483 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9484 DeviceID =
9485 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9486 } else {
9487 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9488 }
9489 return DeviceID;
9490 }
9491
emitDynCGGroupMem(const OMPExecutableDirective & D,CodeGenFunction & CGF)9492 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9493 CodeGenFunction &CGF) {
9494 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9495
9496 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9497 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9498 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9499 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9500 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9501 /*isSigned=*/false);
9502 }
9503 return DynCGroupMem;
9504 }
genMapInfoForCaptures(MappableExprsHandler & MEHandler,CodeGenFunction & CGF,const CapturedStmt & CS,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,llvm::OpenMPIRBuilder & OMPBuilder,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & MappedVarSet,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo)9505 static void genMapInfoForCaptures(
9506 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9507 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9508 llvm::OpenMPIRBuilder &OMPBuilder,
9509 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9510 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9511
9512 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9513 auto RI = CS.getCapturedRecordDecl()->field_begin();
9514 auto *CV = CapturedVars.begin();
9515 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9516 CE = CS.capture_end();
9517 CI != CE; ++CI, ++RI, ++CV) {
9518 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9519
9520 // VLA sizes are passed to the outlined region by copy and do not have map
9521 // information associated.
9522 if (CI->capturesVariableArrayType()) {
9523 CurInfo.Exprs.push_back(nullptr);
9524 CurInfo.BasePointers.push_back(*CV);
9525 CurInfo.DevicePtrDecls.push_back(nullptr);
9526 CurInfo.DevicePointers.push_back(
9527 MappableExprsHandler::DeviceInfoTy::None);
9528 CurInfo.Pointers.push_back(*CV);
9529 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9530 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9531 // Copy to the device as an argument. No need to retrieve it.
9532 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9533 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9534 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9535 CurInfo.Mappers.push_back(nullptr);
9536 } else {
9537 // If we have any information in the map clause, we use it, otherwise we
9538 // just do a default mapping.
9539 MEHandler.generateInfoForCaptureFromClauseInfo(
9540 CI, *CV, CurInfo, OMPBuilder,
9541 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9542
9543 if (!CI->capturesThis())
9544 MappedVarSet.insert(CI->getCapturedVar());
9545 else
9546 MappedVarSet.insert(nullptr);
9547
9548 if (CurInfo.BasePointers.empty())
9549 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9550
9551 // Generate correct mapping for variables captured by reference in
9552 // lambdas.
9553 if (CI->capturesVariable())
9554 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9555 CurInfo, LambdaPointers);
9556 }
9557 // We expect to have at least an element of information for this capture.
9558 assert(!CurInfo.BasePointers.empty() &&
9559 "Non-existing map pointer for capture!");
9560 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9561 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9562 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9563 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9564 "Inconsistent map information sizes!");
9565
9566 // We need to append the results of this capture to what we already have.
9567 CombinedInfo.append(CurInfo);
9568 }
9569 // Adjust MEMBER_OF flags for the lambdas captures.
9570 MEHandler.adjustMemberOfForLambdaCaptures(
9571 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9572 CombinedInfo.Pointers, CombinedInfo.Types);
9573 }
9574 static void
genMapInfo(MappableExprsHandler & MEHandler,CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkippedVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ())9575 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9576 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9577 llvm::OpenMPIRBuilder &OMPBuilder,
9578 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9579 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9580
9581 CodeGenModule &CGM = CGF.CGM;
9582 // Map any list items in a map clause that were not captures because they
9583 // weren't referenced within the construct.
9584 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9585
9586 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9587 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9588 };
9589 if (CGM.getCodeGenOpts().getDebugInfo() !=
9590 llvm::codegenoptions::NoDebugInfo) {
9591 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9592 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9593 FillInfoMap);
9594 }
9595 }
9596
genMapInfo(const OMPExecutableDirective & D,CodeGenFunction & CGF,const CapturedStmt & CS,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo)9597 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9598 const CapturedStmt &CS,
9599 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600 llvm::OpenMPIRBuilder &OMPBuilder,
9601 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9602 // Get mappable expression information.
9603 MappableExprsHandler MEHandler(D, CGF);
9604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9605
9606 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9607 MappedVarSet, CombinedInfo);
9608 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9609 }
9610
9611 template <typename ClauseTy>
9612 static void
emitClauseForBareTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & Values)9613 emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9614 const OMPExecutableDirective &D,
9615 llvm::SmallVectorImpl<llvm::Value *> &Values) {
9616 const auto *C = D.getSingleClause<ClauseTy>();
9617 assert(!C->varlist_empty() &&
9618 "ompx_bare requires explicit num_teams and thread_limit");
9619 CodeGenFunction::RunCleanupsScope Scope(CGF);
9620 for (auto *E : C->varlist()) {
9621 llvm::Value *V = CGF.EmitScalarExpr(E);
9622 Values.push_back(
9623 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9624 }
9625 }
9626
emitTargetCallKernelLaunch(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::Value * OutlinedFnID,CodeGenFunction::OMPTargetDataInfo & InputInfo,llvm::Value * & MapTypesArray,llvm::Value * & MapNamesArray,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter,CodeGenFunction & CGF,CodeGenModule & CGM)9627 static void emitTargetCallKernelLaunch(
9628 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9629 const OMPExecutableDirective &D,
9630 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9631 const CapturedStmt &CS, bool OffloadingMandatory,
9632 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9633 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9634 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9635 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9636 const OMPLoopDirective &D)>
9637 SizeEmitter,
9638 CodeGenFunction &CGF, CodeGenModule &CGM) {
9639 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9640
9641 // Fill up the arrays with all the captured variables.
9642 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9643 CGOpenMPRuntime::TargetDataInfo Info;
9644 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9645
9646 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9647 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9648
9649 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9650 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9651 CGF.VoidPtrTy, CGM.getPointerAlign());
9652 InputInfo.PointersArray =
9653 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9654 InputInfo.SizesArray =
9655 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9656 InputInfo.MappersArray =
9657 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9658 MapTypesArray = Info.RTArgs.MapTypesArray;
9659 MapNamesArray = Info.RTArgs.MapNamesArray;
9660
9661 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9662 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9663 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9664 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9665 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9666
9667 if (IsReverseOffloading) {
9668 // Reverse offloading is not supported, so just execute on the host.
9669 // FIXME: This fallback solution is incorrect since it ignores the
9670 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9671 // assert here and ensure SEMA emits an error.
9672 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9673 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9674 return;
9675 }
9676
9677 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9678 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9679
9680 llvm::Value *BasePointersArray =
9681 InputInfo.BasePointersArray.emitRawPointer(CGF);
9682 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9683 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9684 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9685
9686 auto &&EmitTargetCallFallbackCB =
9687 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9688 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9689 -> llvm::OpenMPIRBuilder::InsertPointTy {
9690 CGF.Builder.restoreIP(IP);
9691 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9692 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9693 return CGF.Builder.saveIP();
9694 };
9695
9696 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9697 SmallVector<llvm::Value *, 3> NumTeams;
9698 SmallVector<llvm::Value *, 3> NumThreads;
9699 if (IsBare) {
9700 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9701 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9702 NumThreads);
9703 } else {
9704 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9705 NumThreads.push_back(
9706 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9707 }
9708
9709 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9710 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9711 llvm::Value *NumIterations =
9712 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9713 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9714 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9715 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9716
9717 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9718 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9719 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9720
9721 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9722 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9723 DynCGGroupMem, HasNoWait);
9724
9725 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9726 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9727 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9728 RTLoc, AllocaIP));
9729 CGF.Builder.restoreIP(AfterIP);
9730 };
9731
9732 if (RequiresOuterTask)
9733 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9734 else
9735 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9736 }
9737
9738 static void
emitTargetCallElse(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9739 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9740 const OMPExecutableDirective &D,
9741 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9742 bool RequiresOuterTask, const CapturedStmt &CS,
9743 bool OffloadingMandatory, CodeGenFunction &CGF) {
9744
9745 // Notify that the host version must be executed.
9746 auto &&ElseGen =
9747 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9748 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9749 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9750 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9751 };
9752
9753 if (RequiresOuterTask) {
9754 CodeGenFunction::OMPTargetDataInfo InputInfo;
9755 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9756 } else {
9757 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9758 }
9759 }
9760
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9761 void CGOpenMPRuntime::emitTargetCall(
9762 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9763 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9764 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9765 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9766 const OMPLoopDirective &D)>
9767 SizeEmitter) {
9768 if (!CGF.HaveInsertPoint())
9769 return;
9770
9771 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9772 CGM.getLangOpts().OpenMPOffloadMandatory;
9773
9774 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9775
9776 const bool RequiresOuterTask =
9777 D.hasClausesOfKind<OMPDependClause>() ||
9778 D.hasClausesOfKind<OMPNowaitClause>() ||
9779 D.hasClausesOfKind<OMPInReductionClause>() ||
9780 (CGM.getLangOpts().OpenMP >= 51 &&
9781 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9782 D.hasClausesOfKind<OMPThreadLimitClause>());
9783 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9784 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9785 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9786 PrePostActionTy &) {
9787 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9788 };
9789 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9790
9791 CodeGenFunction::OMPTargetDataInfo InputInfo;
9792 llvm::Value *MapTypesArray = nullptr;
9793 llvm::Value *MapNamesArray = nullptr;
9794
9795 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9796 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9797 OutlinedFnID, &InputInfo, &MapTypesArray,
9798 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9799 PrePostActionTy &) {
9800 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9801 RequiresOuterTask, CS, OffloadingMandatory,
9802 Device, OutlinedFnID, InputInfo, MapTypesArray,
9803 MapNamesArray, SizeEmitter, CGF, CGM);
9804 };
9805
9806 auto &&TargetElseGen =
9807 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9808 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9809 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9810 CS, OffloadingMandatory, CGF);
9811 };
9812
9813 // If we have a target function ID it means that we need to support
9814 // offloading, otherwise, just execute on the host. We need to execute on host
9815 // regardless of the conditional in the if clause if, e.g., the user do not
9816 // specify target triples.
9817 if (OutlinedFnID) {
9818 if (IfCond) {
9819 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9820 } else {
9821 RegionCodeGenTy ThenRCG(TargetThenGen);
9822 ThenRCG(CGF);
9823 }
9824 } else {
9825 RegionCodeGenTy ElseRCG(TargetElseGen);
9826 ElseRCG(CGF);
9827 }
9828 }
9829
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9830 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9831 StringRef ParentName) {
9832 if (!S)
9833 return;
9834
9835 // Codegen OMP target directives that offload compute to the device.
9836 bool RequiresDeviceCodegen =
9837 isa<OMPExecutableDirective>(S) &&
9838 isOpenMPTargetExecutionDirective(
9839 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9840
9841 if (RequiresDeviceCodegen) {
9842 const auto &E = *cast<OMPExecutableDirective>(S);
9843
9844 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9845 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9846
9847 // Is this a target region that should not be emitted as an entry point? If
9848 // so just signal we are done with this target region.
9849 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9850 return;
9851
9852 switch (E.getDirectiveKind()) {
9853 case OMPD_target:
9854 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9855 cast<OMPTargetDirective>(E));
9856 break;
9857 case OMPD_target_parallel:
9858 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9859 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9860 break;
9861 case OMPD_target_teams:
9862 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9863 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9864 break;
9865 case OMPD_target_teams_distribute:
9866 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9867 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9868 break;
9869 case OMPD_target_teams_distribute_simd:
9870 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9871 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9872 break;
9873 case OMPD_target_parallel_for:
9874 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9875 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9876 break;
9877 case OMPD_target_parallel_for_simd:
9878 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9879 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9880 break;
9881 case OMPD_target_simd:
9882 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9883 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9884 break;
9885 case OMPD_target_teams_distribute_parallel_for:
9886 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9887 CGM, ParentName,
9888 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9889 break;
9890 case OMPD_target_teams_distribute_parallel_for_simd:
9891 CodeGenFunction::
9892 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9893 CGM, ParentName,
9894 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9895 break;
9896 case OMPD_target_teams_loop:
9897 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9898 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9899 break;
9900 case OMPD_target_parallel_loop:
9901 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9902 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9903 break;
9904 case OMPD_parallel:
9905 case OMPD_for:
9906 case OMPD_parallel_for:
9907 case OMPD_parallel_master:
9908 case OMPD_parallel_sections:
9909 case OMPD_for_simd:
9910 case OMPD_parallel_for_simd:
9911 case OMPD_cancel:
9912 case OMPD_cancellation_point:
9913 case OMPD_ordered:
9914 case OMPD_threadprivate:
9915 case OMPD_allocate:
9916 case OMPD_task:
9917 case OMPD_simd:
9918 case OMPD_tile:
9919 case OMPD_unroll:
9920 case OMPD_sections:
9921 case OMPD_section:
9922 case OMPD_single:
9923 case OMPD_master:
9924 case OMPD_critical:
9925 case OMPD_taskyield:
9926 case OMPD_barrier:
9927 case OMPD_taskwait:
9928 case OMPD_taskgroup:
9929 case OMPD_atomic:
9930 case OMPD_flush:
9931 case OMPD_depobj:
9932 case OMPD_scan:
9933 case OMPD_teams:
9934 case OMPD_target_data:
9935 case OMPD_target_exit_data:
9936 case OMPD_target_enter_data:
9937 case OMPD_distribute:
9938 case OMPD_distribute_simd:
9939 case OMPD_distribute_parallel_for:
9940 case OMPD_distribute_parallel_for_simd:
9941 case OMPD_teams_distribute:
9942 case OMPD_teams_distribute_simd:
9943 case OMPD_teams_distribute_parallel_for:
9944 case OMPD_teams_distribute_parallel_for_simd:
9945 case OMPD_target_update:
9946 case OMPD_declare_simd:
9947 case OMPD_declare_variant:
9948 case OMPD_begin_declare_variant:
9949 case OMPD_end_declare_variant:
9950 case OMPD_declare_target:
9951 case OMPD_end_declare_target:
9952 case OMPD_declare_reduction:
9953 case OMPD_declare_mapper:
9954 case OMPD_taskloop:
9955 case OMPD_taskloop_simd:
9956 case OMPD_master_taskloop:
9957 case OMPD_master_taskloop_simd:
9958 case OMPD_parallel_master_taskloop:
9959 case OMPD_parallel_master_taskloop_simd:
9960 case OMPD_requires:
9961 case OMPD_metadirective:
9962 case OMPD_unknown:
9963 default:
9964 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9965 }
9966 return;
9967 }
9968
9969 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9970 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9971 return;
9972
9973 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9974 return;
9975 }
9976
9977 // If this is a lambda function, look into its body.
9978 if (const auto *L = dyn_cast<LambdaExpr>(S))
9979 S = L->getBody();
9980
9981 // Keep looking for target regions recursively.
9982 for (const Stmt *II : S->children())
9983 scanForTargetRegionsFunctions(II, ParentName);
9984 }
9985
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)9986 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9987 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9988 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9989 if (!DevTy)
9990 return false;
9991 // Do not emit device_type(nohost) functions for the host.
9992 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9993 return true;
9994 // Do not emit device_type(host) functions for the device.
9995 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9996 return true;
9997 return false;
9998 }
9999
emitTargetFunctions(GlobalDecl GD)10000 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10001 // If emitting code for the host, we do not process FD here. Instead we do
10002 // the normal code generation.
10003 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10004 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10005 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10006 CGM.getLangOpts().OpenMPIsTargetDevice))
10007 return true;
10008 return false;
10009 }
10010
10011 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10012 // Try to detect target regions in the function.
10013 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10014 StringRef Name = CGM.getMangledName(GD);
10015 scanForTargetRegionsFunctions(FD->getBody(), Name);
10016 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10017 CGM.getLangOpts().OpenMPIsTargetDevice))
10018 return true;
10019 }
10020
10021 // Do not to emit function if it is not marked as declare target.
10022 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10023 AlreadyEmittedTargetDecls.count(VD) == 0;
10024 }
10025
emitTargetGlobalVariable(GlobalDecl GD)10026 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10027 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10028 CGM.getLangOpts().OpenMPIsTargetDevice))
10029 return true;
10030
10031 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10032 return false;
10033
10034 // Check if there are Ctors/Dtors in this declaration and look for target
10035 // regions in it. We use the complete variant to produce the kernel name
10036 // mangling.
10037 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10038 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10039 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10040 StringRef ParentName =
10041 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10042 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10043 }
10044 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10045 StringRef ParentName =
10046 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10047 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10048 }
10049 }
10050
10051 // Do not to emit variable if it is not marked as declare target.
10052 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10053 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10054 cast<VarDecl>(GD.getDecl()));
10055 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10056 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10057 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10058 HasRequiresUnifiedSharedMemory)) {
10059 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10060 return true;
10061 }
10062 return false;
10063 }
10064
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)10065 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10066 llvm::Constant *Addr) {
10067 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10068 !CGM.getLangOpts().OpenMPIsTargetDevice)
10069 return;
10070
10071 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10072 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10073
10074 // If this is an 'extern' declaration we defer to the canonical definition and
10075 // do not emit an offloading entry.
10076 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10077 VD->hasExternalStorage())
10078 return;
10079
10080 if (!Res) {
10081 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10082 // Register non-target variables being emitted in device code (debug info
10083 // may cause this).
10084 StringRef VarName = CGM.getMangledName(VD);
10085 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10086 }
10087 return;
10088 }
10089
10090 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10091 auto LinkageForVariable = [&VD, this]() {
10092 return CGM.getLLVMLinkageVarDefinition(VD);
10093 };
10094
10095 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10096 OMPBuilder.registerTargetGlobalVariable(
10097 convertCaptureClause(VD), convertDeviceClause(VD),
10098 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10099 VD->isExternallyVisible(),
10100 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10101 VD->getCanonicalDecl()->getBeginLoc()),
10102 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10103 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10104 CGM.getTypes().ConvertTypeForMem(
10105 CGM.getContext().getPointerType(VD->getType())),
10106 Addr);
10107
10108 for (auto *ref : GeneratedRefs)
10109 CGM.addCompilerUsedGlobal(ref);
10110 }
10111
emitTargetGlobal(GlobalDecl GD)10112 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10113 if (isa<FunctionDecl>(GD.getDecl()) ||
10114 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10115 return emitTargetFunctions(GD);
10116
10117 return emitTargetGlobalVariable(GD);
10118 }
10119
emitDeferredTargetDecls() const10120 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10121 for (const VarDecl *VD : DeferredGlobalVariables) {
10122 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10123 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10124 if (!Res)
10125 continue;
10126 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10127 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10128 !HasRequiresUnifiedSharedMemory) {
10129 CGM.EmitGlobal(VD);
10130 } else {
10131 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10132 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10133 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10134 HasRequiresUnifiedSharedMemory)) &&
10135 "Expected link clause or to clause with unified memory.");
10136 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10137 }
10138 }
10139 }
10140
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10141 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10142 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10143 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10144 " Expected target-based directive.");
10145 }
10146
processRequiresDirective(const OMPRequiresDecl * D)10147 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10148 for (const OMPClause *Clause : D->clauselists()) {
10149 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10150 HasRequiresUnifiedSharedMemory = true;
10151 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10152 } else if (const auto *AC =
10153 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10154 switch (AC->getAtomicDefaultMemOrderKind()) {
10155 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10156 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10157 break;
10158 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10159 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10160 break;
10161 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10162 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10163 break;
10164 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10165 break;
10166 }
10167 }
10168 }
10169 }
10170
getDefaultMemoryOrdering() const10171 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10172 return RequiresAtomicOrdering;
10173 }
10174
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10175 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10176 LangAS &AS) {
10177 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10178 return false;
10179 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10180 switch(A->getAllocatorType()) {
10181 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10182 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10183 // Not supported, fallback to the default mem space.
10184 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10185 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10186 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10187 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10188 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10189 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10190 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10191 AS = LangAS::Default;
10192 return true;
10193 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10194 llvm_unreachable("Expected predefined allocator for the variables with the "
10195 "static storage.");
10196 }
10197 return false;
10198 }
10199
hasRequiresUnifiedSharedMemory() const10200 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10201 return HasRequiresUnifiedSharedMemory;
10202 }
10203
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10204 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10205 CodeGenModule &CGM)
10206 : CGM(CGM) {
10207 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10208 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10209 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10210 }
10211 }
10212
~DisableAutoDeclareTargetRAII()10213 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10214 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10215 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10216 }
10217
markAsGlobalTarget(GlobalDecl GD)10218 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10219 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10220 return true;
10221
10222 const auto *D = cast<FunctionDecl>(GD.getDecl());
10223 // Do not to emit function if it is marked as declare target as it was already
10224 // emitted.
10225 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10226 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10227 if (auto *F = dyn_cast_or_null<llvm::Function>(
10228 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10229 return !F->isDeclaration();
10230 return false;
10231 }
10232 return true;
10233 }
10234
10235 return !AlreadyEmittedTargetDecls.insert(D).second;
10236 }
10237
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10238 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10239 const OMPExecutableDirective &D,
10240 SourceLocation Loc,
10241 llvm::Function *OutlinedFn,
10242 ArrayRef<llvm::Value *> CapturedVars) {
10243 if (!CGF.HaveInsertPoint())
10244 return;
10245
10246 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10247 CodeGenFunction::RunCleanupsScope Scope(CGF);
10248
10249 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10250 llvm::Value *Args[] = {
10251 RTLoc,
10252 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10253 OutlinedFn};
10254 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10255 RealArgs.append(std::begin(Args), std::end(Args));
10256 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10257
10258 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10259 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10260 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10261 }
10262
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10263 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10264 const Expr *NumTeams,
10265 const Expr *ThreadLimit,
10266 SourceLocation Loc) {
10267 if (!CGF.HaveInsertPoint())
10268 return;
10269
10270 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10271
10272 llvm::Value *NumTeamsVal =
10273 NumTeams
10274 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10275 CGF.CGM.Int32Ty, /* isSigned = */ true)
10276 : CGF.Builder.getInt32(0);
10277
10278 llvm::Value *ThreadLimitVal =
10279 ThreadLimit
10280 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10281 CGF.CGM.Int32Ty, /* isSigned = */ true)
10282 : CGF.Builder.getInt32(0);
10283
10284 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10285 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10286 ThreadLimitVal};
10287 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10288 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10289 PushNumTeamsArgs);
10290 }
10291
emitThreadLimitClause(CodeGenFunction & CGF,const Expr * ThreadLimit,SourceLocation Loc)10292 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10293 const Expr *ThreadLimit,
10294 SourceLocation Loc) {
10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10296 llvm::Value *ThreadLimitVal =
10297 ThreadLimit
10298 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10299 CGF.CGM.Int32Ty, /* isSigned = */ true)
10300 : CGF.Builder.getInt32(0);
10301
10302 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10303 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10304 ThreadLimitVal};
10305 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10306 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10307 ThreadLimitArgs);
10308 }
10309
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)10310 void CGOpenMPRuntime::emitTargetDataCalls(
10311 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10312 const Expr *Device, const RegionCodeGenTy &CodeGen,
10313 CGOpenMPRuntime::TargetDataInfo &Info) {
10314 if (!CGF.HaveInsertPoint())
10315 return;
10316
10317 // Action used to replace the default codegen action and turn privatization
10318 // off.
10319 PrePostActionTy NoPrivAction;
10320
10321 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10322
10323 llvm::Value *IfCondVal = nullptr;
10324 if (IfCond)
10325 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10326
10327 // Emit device ID if any.
10328 llvm::Value *DeviceID = nullptr;
10329 if (Device) {
10330 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10331 CGF.Int64Ty, /*isSigned=*/true);
10332 } else {
10333 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10334 }
10335
10336 // Fill up the arrays with all the mapped variables.
10337 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10338 auto GenMapInfoCB =
10339 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10340 CGF.Builder.restoreIP(CodeGenIP);
10341 // Get map clause information.
10342 MappableExprsHandler MEHandler(D, CGF);
10343 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10344
10345 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10346 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10347 };
10348 if (CGM.getCodeGenOpts().getDebugInfo() !=
10349 llvm::codegenoptions::NoDebugInfo) {
10350 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10351 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10352 FillInfoMap);
10353 }
10354
10355 return CombinedInfo;
10356 };
10357 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10358 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10359 CGF.Builder.restoreIP(CodeGenIP);
10360 switch (BodyGenType) {
10361 case BodyGenTy::Priv:
10362 if (!Info.CaptureDeviceAddrMap.empty())
10363 CodeGen(CGF);
10364 break;
10365 case BodyGenTy::DupNoPriv:
10366 if (!Info.CaptureDeviceAddrMap.empty()) {
10367 CodeGen.setAction(NoPrivAction);
10368 CodeGen(CGF);
10369 }
10370 break;
10371 case BodyGenTy::NoPriv:
10372 if (Info.CaptureDeviceAddrMap.empty()) {
10373 CodeGen.setAction(NoPrivAction);
10374 CodeGen(CGF);
10375 }
10376 break;
10377 }
10378 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10379 CGF.Builder.GetInsertPoint());
10380 };
10381
10382 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10383 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10384 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10385 }
10386 };
10387
10388 auto CustomMapperCB = [&](unsigned int I) {
10389 llvm::Function *MFunc = nullptr;
10390 if (CombinedInfo.Mappers[I]) {
10391 Info.HasMapper = true;
10392 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10393 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10394 }
10395 return MFunc;
10396 };
10397
10398 // Source location for the ident struct
10399 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10400
10401 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10402 CGF.AllocaInsertPt->getIterator());
10403 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10404 CGF.Builder.GetInsertPoint());
10405 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10406 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10407 cantFail(OMPBuilder.createTargetData(
10408 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10409 CustomMapperCB,
10410 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10411 CGF.Builder.restoreIP(AfterIP);
10412 }
10413
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10414 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10415 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10416 const Expr *Device) {
10417 if (!CGF.HaveInsertPoint())
10418 return;
10419
10420 assert((isa<OMPTargetEnterDataDirective>(D) ||
10421 isa<OMPTargetExitDataDirective>(D) ||
10422 isa<OMPTargetUpdateDirective>(D)) &&
10423 "Expecting either target enter, exit data, or update directives.");
10424
10425 CodeGenFunction::OMPTargetDataInfo InputInfo;
10426 llvm::Value *MapTypesArray = nullptr;
10427 llvm::Value *MapNamesArray = nullptr;
10428 // Generate the code for the opening of the data environment.
10429 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10430 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10431 // Emit device ID if any.
10432 llvm::Value *DeviceID = nullptr;
10433 if (Device) {
10434 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10435 CGF.Int64Ty, /*isSigned=*/true);
10436 } else {
10437 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10438 }
10439
10440 // Emit the number of elements in the offloading arrays.
10441 llvm::Constant *PointerNum =
10442 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10443
10444 // Source location for the ident struct
10445 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10446
10447 SmallVector<llvm::Value *, 13> OffloadingArgs(
10448 {RTLoc, DeviceID, PointerNum,
10449 InputInfo.BasePointersArray.emitRawPointer(CGF),
10450 InputInfo.PointersArray.emitRawPointer(CGF),
10451 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10452 InputInfo.MappersArray.emitRawPointer(CGF)});
10453
10454 // Select the right runtime function call for each standalone
10455 // directive.
10456 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10457 RuntimeFunction RTLFn;
10458 switch (D.getDirectiveKind()) {
10459 case OMPD_target_enter_data:
10460 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10461 : OMPRTL___tgt_target_data_begin_mapper;
10462 break;
10463 case OMPD_target_exit_data:
10464 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10465 : OMPRTL___tgt_target_data_end_mapper;
10466 break;
10467 case OMPD_target_update:
10468 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10469 : OMPRTL___tgt_target_data_update_mapper;
10470 break;
10471 case OMPD_parallel:
10472 case OMPD_for:
10473 case OMPD_parallel_for:
10474 case OMPD_parallel_master:
10475 case OMPD_parallel_sections:
10476 case OMPD_for_simd:
10477 case OMPD_parallel_for_simd:
10478 case OMPD_cancel:
10479 case OMPD_cancellation_point:
10480 case OMPD_ordered:
10481 case OMPD_threadprivate:
10482 case OMPD_allocate:
10483 case OMPD_task:
10484 case OMPD_simd:
10485 case OMPD_tile:
10486 case OMPD_unroll:
10487 case OMPD_sections:
10488 case OMPD_section:
10489 case OMPD_single:
10490 case OMPD_master:
10491 case OMPD_critical:
10492 case OMPD_taskyield:
10493 case OMPD_barrier:
10494 case OMPD_taskwait:
10495 case OMPD_taskgroup:
10496 case OMPD_atomic:
10497 case OMPD_flush:
10498 case OMPD_depobj:
10499 case OMPD_scan:
10500 case OMPD_teams:
10501 case OMPD_target_data:
10502 case OMPD_distribute:
10503 case OMPD_distribute_simd:
10504 case OMPD_distribute_parallel_for:
10505 case OMPD_distribute_parallel_for_simd:
10506 case OMPD_teams_distribute:
10507 case OMPD_teams_distribute_simd:
10508 case OMPD_teams_distribute_parallel_for:
10509 case OMPD_teams_distribute_parallel_for_simd:
10510 case OMPD_declare_simd:
10511 case OMPD_declare_variant:
10512 case OMPD_begin_declare_variant:
10513 case OMPD_end_declare_variant:
10514 case OMPD_declare_target:
10515 case OMPD_end_declare_target:
10516 case OMPD_declare_reduction:
10517 case OMPD_declare_mapper:
10518 case OMPD_taskloop:
10519 case OMPD_taskloop_simd:
10520 case OMPD_master_taskloop:
10521 case OMPD_master_taskloop_simd:
10522 case OMPD_parallel_master_taskloop:
10523 case OMPD_parallel_master_taskloop_simd:
10524 case OMPD_target:
10525 case OMPD_target_simd:
10526 case OMPD_target_teams_distribute:
10527 case OMPD_target_teams_distribute_simd:
10528 case OMPD_target_teams_distribute_parallel_for:
10529 case OMPD_target_teams_distribute_parallel_for_simd:
10530 case OMPD_target_teams:
10531 case OMPD_target_parallel:
10532 case OMPD_target_parallel_for:
10533 case OMPD_target_parallel_for_simd:
10534 case OMPD_requires:
10535 case OMPD_metadirective:
10536 case OMPD_unknown:
10537 default:
10538 llvm_unreachable("Unexpected standalone target data directive.");
10539 break;
10540 }
10541 if (HasNowait) {
10542 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10543 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10544 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10545 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10546 }
10547 CGF.EmitRuntimeCall(
10548 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10549 OffloadingArgs);
10550 };
10551
10552 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10553 &MapNamesArray](CodeGenFunction &CGF,
10554 PrePostActionTy &) {
10555 // Fill up the arrays with all the mapped variables.
10556 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10557 CGOpenMPRuntime::TargetDataInfo Info;
10558 MappableExprsHandler MEHandler(D, CGF);
10559 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10560 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10561 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10562
10563 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10564 D.hasClausesOfKind<OMPNowaitClause>();
10565
10566 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10567 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10568 CGF.VoidPtrTy, CGM.getPointerAlign());
10569 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10570 CGM.getPointerAlign());
10571 InputInfo.SizesArray =
10572 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10573 InputInfo.MappersArray =
10574 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10575 MapTypesArray = Info.RTArgs.MapTypesArray;
10576 MapNamesArray = Info.RTArgs.MapNamesArray;
10577 if (RequiresOuterTask)
10578 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10579 else
10580 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10581 };
10582
10583 if (IfCond) {
10584 emitIfClause(CGF, IfCond, TargetThenGen,
10585 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10586 } else {
10587 RegionCodeGenTy ThenRCG(TargetThenGen);
10588 ThenRCG(CGF);
10589 }
10590 }
10591
10592 namespace {
10593 /// Kind of parameter in a function with 'declare simd' directive.
10594 enum ParamKindTy {
10595 Linear,
10596 LinearRef,
10597 LinearUVal,
10598 LinearVal,
10599 Uniform,
10600 Vector,
10601 };
10602 /// Attribute set of the parameter.
10603 struct ParamAttrTy {
10604 ParamKindTy Kind = Vector;
10605 llvm::APSInt StrideOrArg;
10606 llvm::APSInt Alignment;
10607 bool HasVarStride = false;
10608 };
10609 } // namespace
10610
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10611 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10612 ArrayRef<ParamAttrTy> ParamAttrs) {
10613 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10614 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10615 // of that clause. The VLEN value must be power of 2.
10616 // In other case the notion of the function`s "characteristic data type" (CDT)
10617 // is used to compute the vector length.
10618 // CDT is defined in the following order:
10619 // a) For non-void function, the CDT is the return type.
10620 // b) If the function has any non-uniform, non-linear parameters, then the
10621 // CDT is the type of the first such parameter.
10622 // c) If the CDT determined by a) or b) above is struct, union, or class
10623 // type which is pass-by-value (except for the type that maps to the
10624 // built-in complex data type), the characteristic data type is int.
10625 // d) If none of the above three cases is applicable, the CDT is int.
10626 // The VLEN is then determined based on the CDT and the size of vector
10627 // register of that ISA for which current vector version is generated. The
10628 // VLEN is computed using the formula below:
10629 // VLEN = sizeof(vector_register) / sizeof(CDT),
10630 // where vector register size specified in section 3.2.1 Registers and the
10631 // Stack Frame of original AMD64 ABI document.
10632 QualType RetType = FD->getReturnType();
10633 if (RetType.isNull())
10634 return 0;
10635 ASTContext &C = FD->getASTContext();
10636 QualType CDT;
10637 if (!RetType.isNull() && !RetType->isVoidType()) {
10638 CDT = RetType;
10639 } else {
10640 unsigned Offset = 0;
10641 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10642 if (ParamAttrs[Offset].Kind == Vector)
10643 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10644 ++Offset;
10645 }
10646 if (CDT.isNull()) {
10647 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10648 if (ParamAttrs[I + Offset].Kind == Vector) {
10649 CDT = FD->getParamDecl(I)->getType();
10650 break;
10651 }
10652 }
10653 }
10654 }
10655 if (CDT.isNull())
10656 CDT = C.IntTy;
10657 CDT = CDT->getCanonicalTypeUnqualified();
10658 if (CDT->isRecordType() || CDT->isUnionType())
10659 CDT = C.IntTy;
10660 return C.getTypeSize(CDT);
10661 }
10662
10663 /// Mangle the parameter part of the vector function name according to
10664 /// their OpenMP classification. The mangling function is defined in
10665 /// section 4.5 of the AAVFABI(2021Q1).
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10666 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10667 SmallString<256> Buffer;
10668 llvm::raw_svector_ostream Out(Buffer);
10669 for (const auto &ParamAttr : ParamAttrs) {
10670 switch (ParamAttr.Kind) {
10671 case Linear:
10672 Out << 'l';
10673 break;
10674 case LinearRef:
10675 Out << 'R';
10676 break;
10677 case LinearUVal:
10678 Out << 'U';
10679 break;
10680 case LinearVal:
10681 Out << 'L';
10682 break;
10683 case Uniform:
10684 Out << 'u';
10685 break;
10686 case Vector:
10687 Out << 'v';
10688 break;
10689 }
10690 if (ParamAttr.HasVarStride)
10691 Out << "s" << ParamAttr.StrideOrArg;
10692 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10693 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10694 // Don't print the step value if it is not present or if it is
10695 // equal to 1.
10696 if (ParamAttr.StrideOrArg < 0)
10697 Out << 'n' << -ParamAttr.StrideOrArg;
10698 else if (ParamAttr.StrideOrArg != 1)
10699 Out << ParamAttr.StrideOrArg;
10700 }
10701
10702 if (!!ParamAttr.Alignment)
10703 Out << 'a' << ParamAttr.Alignment;
10704 }
10705
10706 return std::string(Out.str());
10707 }
10708
10709 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10710 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10711 const llvm::APSInt &VLENVal,
10712 ArrayRef<ParamAttrTy> ParamAttrs,
10713 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10714 struct ISADataTy {
10715 char ISA;
10716 unsigned VecRegSize;
10717 };
10718 ISADataTy ISAData[] = {
10719 {
10720 'b', 128
10721 }, // SSE
10722 {
10723 'c', 256
10724 }, // AVX
10725 {
10726 'd', 256
10727 }, // AVX2
10728 {
10729 'e', 512
10730 }, // AVX512
10731 };
10732 llvm::SmallVector<char, 2> Masked;
10733 switch (State) {
10734 case OMPDeclareSimdDeclAttr::BS_Undefined:
10735 Masked.push_back('N');
10736 Masked.push_back('M');
10737 break;
10738 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10739 Masked.push_back('N');
10740 break;
10741 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10742 Masked.push_back('M');
10743 break;
10744 }
10745 for (char Mask : Masked) {
10746 for (const ISADataTy &Data : ISAData) {
10747 SmallString<256> Buffer;
10748 llvm::raw_svector_ostream Out(Buffer);
10749 Out << "_ZGV" << Data.ISA << Mask;
10750 if (!VLENVal) {
10751 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10752 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10753 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10754 } else {
10755 Out << VLENVal;
10756 }
10757 Out << mangleVectorParameters(ParamAttrs);
10758 Out << '_' << Fn->getName();
10759 Fn->addFnAttr(Out.str());
10760 }
10761 }
10762 }
10763
10764 // This are the Functions that are needed to mangle the name of the
10765 // vector functions generated by the compiler, according to the rules
10766 // defined in the "Vector Function ABI specifications for AArch64",
10767 // available at
10768 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10769
10770 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
getAArch64MTV(QualType QT,ParamKindTy Kind)10771 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10772 QT = QT.getCanonicalType();
10773
10774 if (QT->isVoidType())
10775 return false;
10776
10777 if (Kind == ParamKindTy::Uniform)
10778 return false;
10779
10780 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10781 return false;
10782
10783 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10784 !QT->isReferenceType())
10785 return false;
10786
10787 return true;
10788 }
10789
10790 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10791 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10792 QT = QT.getCanonicalType();
10793 unsigned Size = C.getTypeSize(QT);
10794
10795 // Only scalars and complex within 16 bytes wide set PVB to true.
10796 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10797 return false;
10798
10799 if (QT->isFloatingType())
10800 return true;
10801
10802 if (QT->isIntegerType())
10803 return true;
10804
10805 if (QT->isPointerType())
10806 return true;
10807
10808 // TODO: Add support for complex types (section 3.1.2, item 2).
10809
10810 return false;
10811 }
10812
10813 /// Computes the lane size (LS) of a return type or of an input parameter,
10814 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10815 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10816 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10817 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10818 QualType PTy = QT.getCanonicalType()->getPointeeType();
10819 if (getAArch64PBV(PTy, C))
10820 return C.getTypeSize(PTy);
10821 }
10822 if (getAArch64PBV(QT, C))
10823 return C.getTypeSize(QT);
10824
10825 return C.getTypeSize(C.getUIntPtrType());
10826 }
10827
10828 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10829 // signature of the scalar function, as defined in 3.2.2 of the
10830 // AAVFABI.
10831 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10832 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10833 QualType RetType = FD->getReturnType().getCanonicalType();
10834
10835 ASTContext &C = FD->getASTContext();
10836
10837 bool OutputBecomesInput = false;
10838
10839 llvm::SmallVector<unsigned, 8> Sizes;
10840 if (!RetType->isVoidType()) {
10841 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10842 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10843 OutputBecomesInput = true;
10844 }
10845 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10846 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10847 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10848 }
10849
10850 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10851 // The LS of a function parameter / return value can only be a power
10852 // of 2, starting from 8 bits, up to 128.
10853 assert(llvm::all_of(Sizes,
10854 [](unsigned Size) {
10855 return Size == 8 || Size == 16 || Size == 32 ||
10856 Size == 64 || Size == 128;
10857 }) &&
10858 "Invalid size");
10859
10860 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
10861 OutputBecomesInput);
10862 }
10863
10864 // Function used to add the attribute. The parameter `VLEN` is
10865 // templated to allow the use of "x" when targeting scalable functions
10866 // for SVE.
10867 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10868 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10869 char ISA, StringRef ParSeq,
10870 StringRef MangledName, bool OutputBecomesInput,
10871 llvm::Function *Fn) {
10872 SmallString<256> Buffer;
10873 llvm::raw_svector_ostream Out(Buffer);
10874 Out << Prefix << ISA << LMask << VLEN;
10875 if (OutputBecomesInput)
10876 Out << "v";
10877 Out << ParSeq << "_" << MangledName;
10878 Fn->addFnAttr(Out.str());
10879 }
10880
10881 // Helper function to generate the Advanced SIMD names depending on
10882 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10883 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10884 StringRef Prefix, char ISA,
10885 StringRef ParSeq, StringRef MangledName,
10886 bool OutputBecomesInput,
10887 llvm::Function *Fn) {
10888 switch (NDS) {
10889 case 8:
10890 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10891 OutputBecomesInput, Fn);
10892 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10893 OutputBecomesInput, Fn);
10894 break;
10895 case 16:
10896 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10897 OutputBecomesInput, Fn);
10898 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10899 OutputBecomesInput, Fn);
10900 break;
10901 case 32:
10902 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10903 OutputBecomesInput, Fn);
10904 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 break;
10907 case 64:
10908 case 128:
10909 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10910 OutputBecomesInput, Fn);
10911 break;
10912 default:
10913 llvm_unreachable("Scalar type is too wide.");
10914 }
10915 }
10916
10917 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10918 static void emitAArch64DeclareSimdFunction(
10919 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10920 ArrayRef<ParamAttrTy> ParamAttrs,
10921 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10922 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10923
10924 // Get basic data for building the vector signature.
10925 const auto Data = getNDSWDS(FD, ParamAttrs);
10926 const unsigned NDS = std::get<0>(Data);
10927 const unsigned WDS = std::get<1>(Data);
10928 const bool OutputBecomesInput = std::get<2>(Data);
10929
10930 // Check the values provided via `simdlen` by the user.
10931 // 1. A `simdlen(1)` doesn't produce vector signatures,
10932 if (UserVLEN == 1) {
10933 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10934 DiagnosticsEngine::Warning,
10935 "The clause simdlen(1) has no effect when targeting aarch64.");
10936 CGM.getDiags().Report(SLoc, DiagID);
10937 return;
10938 }
10939
10940 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10941 // Advanced SIMD output.
10942 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10943 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10944 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10945 "power of 2 when targeting Advanced SIMD.");
10946 CGM.getDiags().Report(SLoc, DiagID);
10947 return;
10948 }
10949
10950 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10951 // limits.
10952 if (ISA == 's' && UserVLEN != 0) {
10953 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10954 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10955 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10956 "lanes in the architectural constraints "
10957 "for SVE (min is 128-bit, max is "
10958 "2048-bit, by steps of 128-bit)");
10959 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10960 return;
10961 }
10962 }
10963
10964 // Sort out parameter sequence.
10965 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10966 StringRef Prefix = "_ZGV";
10967 // Generate simdlen from user input (if any).
10968 if (UserVLEN) {
10969 if (ISA == 's') {
10970 // SVE generates only a masked function.
10971 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10972 OutputBecomesInput, Fn);
10973 } else {
10974 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10975 // Advanced SIMD generates one or two functions, depending on
10976 // the `[not]inbranch` clause.
10977 switch (State) {
10978 case OMPDeclareSimdDeclAttr::BS_Undefined:
10979 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10980 OutputBecomesInput, Fn);
10981 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10982 OutputBecomesInput, Fn);
10983 break;
10984 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10985 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10986 OutputBecomesInput, Fn);
10987 break;
10988 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10989 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10990 OutputBecomesInput, Fn);
10991 break;
10992 }
10993 }
10994 } else {
10995 // If no user simdlen is provided, follow the AAVFABI rules for
10996 // generating the vector length.
10997 if (ISA == 's') {
10998 // SVE, section 3.4.1, item 1.
10999 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11000 OutputBecomesInput, Fn);
11001 } else {
11002 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11003 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11004 // two vector names depending on the use of the clause
11005 // `[not]inbranch`.
11006 switch (State) {
11007 case OMPDeclareSimdDeclAttr::BS_Undefined:
11008 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11009 OutputBecomesInput, Fn);
11010 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11011 OutputBecomesInput, Fn);
11012 break;
11013 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11014 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11015 OutputBecomesInput, Fn);
11016 break;
11017 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11018 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11019 OutputBecomesInput, Fn);
11020 break;
11021 }
11022 }
11023 }
11024 }
11025
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)11026 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11027 llvm::Function *Fn) {
11028 ASTContext &C = CGM.getContext();
11029 FD = FD->getMostRecentDecl();
11030 while (FD) {
11031 // Map params to their positions in function decl.
11032 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11033 if (isa<CXXMethodDecl>(FD))
11034 ParamPositions.try_emplace(FD, 0);
11035 unsigned ParamPos = ParamPositions.size();
11036 for (const ParmVarDecl *P : FD->parameters()) {
11037 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11038 ++ParamPos;
11039 }
11040 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11041 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11042 // Mark uniform parameters.
11043 for (const Expr *E : Attr->uniforms()) {
11044 E = E->IgnoreParenImpCasts();
11045 unsigned Pos;
11046 if (isa<CXXThisExpr>(E)) {
11047 Pos = ParamPositions[FD];
11048 } else {
11049 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11050 ->getCanonicalDecl();
11051 auto It = ParamPositions.find(PVD);
11052 assert(It != ParamPositions.end() && "Function parameter not found");
11053 Pos = It->second;
11054 }
11055 ParamAttrs[Pos].Kind = Uniform;
11056 }
11057 // Get alignment info.
11058 auto *NI = Attr->alignments_begin();
11059 for (const Expr *E : Attr->aligneds()) {
11060 E = E->IgnoreParenImpCasts();
11061 unsigned Pos;
11062 QualType ParmTy;
11063 if (isa<CXXThisExpr>(E)) {
11064 Pos = ParamPositions[FD];
11065 ParmTy = E->getType();
11066 } else {
11067 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11068 ->getCanonicalDecl();
11069 auto It = ParamPositions.find(PVD);
11070 assert(It != ParamPositions.end() && "Function parameter not found");
11071 Pos = It->second;
11072 ParmTy = PVD->getType();
11073 }
11074 ParamAttrs[Pos].Alignment =
11075 (*NI)
11076 ? (*NI)->EvaluateKnownConstInt(C)
11077 : llvm::APSInt::getUnsigned(
11078 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11079 .getQuantity());
11080 ++NI;
11081 }
11082 // Mark linear parameters.
11083 auto *SI = Attr->steps_begin();
11084 auto *MI = Attr->modifiers_begin();
11085 for (const Expr *E : Attr->linears()) {
11086 E = E->IgnoreParenImpCasts();
11087 unsigned Pos;
11088 bool IsReferenceType = false;
11089 // Rescaling factor needed to compute the linear parameter
11090 // value in the mangled name.
11091 unsigned PtrRescalingFactor = 1;
11092 if (isa<CXXThisExpr>(E)) {
11093 Pos = ParamPositions[FD];
11094 auto *P = cast<PointerType>(E->getType());
11095 PtrRescalingFactor = CGM.getContext()
11096 .getTypeSizeInChars(P->getPointeeType())
11097 .getQuantity();
11098 } else {
11099 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11100 ->getCanonicalDecl();
11101 auto It = ParamPositions.find(PVD);
11102 assert(It != ParamPositions.end() && "Function parameter not found");
11103 Pos = It->second;
11104 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11105 PtrRescalingFactor = CGM.getContext()
11106 .getTypeSizeInChars(P->getPointeeType())
11107 .getQuantity();
11108 else if (PVD->getType()->isReferenceType()) {
11109 IsReferenceType = true;
11110 PtrRescalingFactor =
11111 CGM.getContext()
11112 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11113 .getQuantity();
11114 }
11115 }
11116 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11117 if (*MI == OMPC_LINEAR_ref)
11118 ParamAttr.Kind = LinearRef;
11119 else if (*MI == OMPC_LINEAR_uval)
11120 ParamAttr.Kind = LinearUVal;
11121 else if (IsReferenceType)
11122 ParamAttr.Kind = LinearVal;
11123 else
11124 ParamAttr.Kind = Linear;
11125 // Assuming a stride of 1, for `linear` without modifiers.
11126 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11127 if (*SI) {
11128 Expr::EvalResult Result;
11129 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11130 if (const auto *DRE =
11131 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11132 if (const auto *StridePVD =
11133 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11134 ParamAttr.HasVarStride = true;
11135 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11136 assert(It != ParamPositions.end() &&
11137 "Function parameter not found");
11138 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11139 }
11140 }
11141 } else {
11142 ParamAttr.StrideOrArg = Result.Val.getInt();
11143 }
11144 }
11145 // If we are using a linear clause on a pointer, we need to
11146 // rescale the value of linear_step with the byte size of the
11147 // pointee type.
11148 if (!ParamAttr.HasVarStride &&
11149 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11150 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11151 ++SI;
11152 ++MI;
11153 }
11154 llvm::APSInt VLENVal;
11155 SourceLocation ExprLoc;
11156 const Expr *VLENExpr = Attr->getSimdlen();
11157 if (VLENExpr) {
11158 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11159 ExprLoc = VLENExpr->getExprLoc();
11160 }
11161 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11162 if (CGM.getTriple().isX86()) {
11163 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11164 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11165 unsigned VLEN = VLENVal.getExtValue();
11166 StringRef MangledName = Fn->getName();
11167 if (CGM.getTarget().hasFeature("sve"))
11168 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11169 MangledName, 's', 128, Fn, ExprLoc);
11170 else if (CGM.getTarget().hasFeature("neon"))
11171 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11172 MangledName, 'n', 128, Fn, ExprLoc);
11173 }
11174 }
11175 FD = FD->getPreviousDecl();
11176 }
11177 }
11178
11179 namespace {
11180 /// Cleanup action for doacross support.
11181 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11182 public:
11183 static const int DoacrossFinArgs = 2;
11184
11185 private:
11186 llvm::FunctionCallee RTLFn;
11187 llvm::Value *Args[DoacrossFinArgs];
11188
11189 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11190 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11191 ArrayRef<llvm::Value *> CallArgs)
11192 : RTLFn(RTLFn) {
11193 assert(CallArgs.size() == DoacrossFinArgs);
11194 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11195 }
Emit(CodeGenFunction & CGF,Flags)11196 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11197 if (!CGF.HaveInsertPoint())
11198 return;
11199 CGF.EmitRuntimeCall(RTLFn, Args);
11200 }
11201 };
11202 } // namespace
11203
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11204 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11205 const OMPLoopDirective &D,
11206 ArrayRef<Expr *> NumIterations) {
11207 if (!CGF.HaveInsertPoint())
11208 return;
11209
11210 ASTContext &C = CGM.getContext();
11211 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11212 RecordDecl *RD;
11213 if (KmpDimTy.isNull()) {
11214 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11215 // kmp_int64 lo; // lower
11216 // kmp_int64 up; // upper
11217 // kmp_int64 st; // stride
11218 // };
11219 RD = C.buildImplicitRecord("kmp_dim");
11220 RD->startDefinition();
11221 addFieldToRecordDecl(C, RD, Int64Ty);
11222 addFieldToRecordDecl(C, RD, Int64Ty);
11223 addFieldToRecordDecl(C, RD, Int64Ty);
11224 RD->completeDefinition();
11225 KmpDimTy = C.getRecordType(RD);
11226 } else {
11227 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11228 }
11229 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11230 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11231 ArraySizeModifier::Normal, 0);
11232
11233 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11234 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11235 enum { LowerFD = 0, UpperFD, StrideFD };
11236 // Fill dims with data.
11237 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11238 LValue DimsLVal = CGF.MakeAddrLValue(
11239 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11240 // dims.upper = num_iterations;
11241 LValue UpperLVal = CGF.EmitLValueForField(
11242 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11243 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11244 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11245 Int64Ty, NumIterations[I]->getExprLoc());
11246 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11247 // dims.stride = 1;
11248 LValue StrideLVal = CGF.EmitLValueForField(
11249 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11250 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11251 StrideLVal);
11252 }
11253
11254 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11255 // kmp_int32 num_dims, struct kmp_dim * dims);
11256 llvm::Value *Args[] = {
11257 emitUpdateLocation(CGF, D.getBeginLoc()),
11258 getThreadID(CGF, D.getBeginLoc()),
11259 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11261 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11262 CGM.VoidPtrTy)};
11263
11264 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11265 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11266 CGF.EmitRuntimeCall(RTLFn, Args);
11267 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11268 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11269 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11270 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11271 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11272 llvm::ArrayRef(FiniArgs));
11273 }
11274
11275 template <typename T>
EmitDoacrossOrdered(CodeGenFunction & CGF,CodeGenModule & CGM,const T * C,llvm::Value * ULoc,llvm::Value * ThreadID)11276 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11277 const T *C, llvm::Value *ULoc,
11278 llvm::Value *ThreadID) {
11279 QualType Int64Ty =
11280 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11281 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11282 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11283 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11284 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11285 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11286 const Expr *CounterVal = C->getLoopData(I);
11287 assert(CounterVal);
11288 llvm::Value *CntVal = CGF.EmitScalarConversion(
11289 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11290 CounterVal->getExprLoc());
11291 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11292 /*Volatile=*/false, Int64Ty);
11293 }
11294 llvm::Value *Args[] = {
11295 ULoc, ThreadID,
11296 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11297 llvm::FunctionCallee RTLFn;
11298 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11299 OMPDoacrossKind<T> ODK;
11300 if (ODK.isSource(C)) {
11301 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11302 OMPRTL___kmpc_doacross_post);
11303 } else {
11304 assert(ODK.isSink(C) && "Expect sink modifier.");
11305 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11306 OMPRTL___kmpc_doacross_wait);
11307 }
11308 CGF.EmitRuntimeCall(RTLFn, Args);
11309 }
11310
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11311 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11312 const OMPDependClause *C) {
11313 return EmitDoacrossOrdered<OMPDependClause>(
11314 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11315 getThreadID(CGF, C->getBeginLoc()));
11316 }
11317
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)11318 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11319 const OMPDoacrossClause *C) {
11320 return EmitDoacrossOrdered<OMPDoacrossClause>(
11321 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11322 getThreadID(CGF, C->getBeginLoc()));
11323 }
11324
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11325 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11326 llvm::FunctionCallee Callee,
11327 ArrayRef<llvm::Value *> Args) const {
11328 assert(Loc.isValid() && "Outlined function call location must be valid.");
11329 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11330
11331 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11332 if (Fn->doesNotThrow()) {
11333 CGF.EmitNounwindRuntimeCall(Fn, Args);
11334 return;
11335 }
11336 }
11337 CGF.EmitRuntimeCall(Callee, Args);
11338 }
11339
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11340 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11341 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11342 ArrayRef<llvm::Value *> Args) const {
11343 emitCall(CGF, Loc, OutlinedFn, Args);
11344 }
11345
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11346 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11347 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11348 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11349 HasEmittedDeclareTargetRegion = true;
11350 }
11351
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11352 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11353 const VarDecl *NativeParam,
11354 const VarDecl *TargetParam) const {
11355 return CGF.GetAddrOfLocalVar(NativeParam);
11356 }
11357
11358 /// Return allocator value from expression, or return a null allocator (default
11359 /// when no allocator specified).
getAllocatorVal(CodeGenFunction & CGF,const Expr * Allocator)11360 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11361 const Expr *Allocator) {
11362 llvm::Value *AllocVal;
11363 if (Allocator) {
11364 AllocVal = CGF.EmitScalarExpr(Allocator);
11365 // According to the standard, the original allocator type is a enum
11366 // (integer). Convert to pointer type, if required.
11367 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11368 CGF.getContext().VoidPtrTy,
11369 Allocator->getExprLoc());
11370 } else {
11371 // If no allocator specified, it defaults to the null allocator.
11372 AllocVal = llvm::Constant::getNullValue(
11373 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11374 }
11375 return AllocVal;
11376 }
11377
11378 /// Return the alignment from an allocate directive if present.
getAlignmentValue(CodeGenModule & CGM,const VarDecl * VD)11379 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11380 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11381
11382 if (!AllocateAlignment)
11383 return nullptr;
11384
11385 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11386 }
11387
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11388 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11389 const VarDecl *VD) {
11390 if (!VD)
11391 return Address::invalid();
11392 Address UntiedAddr = Address::invalid();
11393 Address UntiedRealAddr = Address::invalid();
11394 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11395 if (It != FunctionToUntiedTaskStackMap.end()) {
11396 const UntiedLocalVarsAddressesMap &UntiedData =
11397 UntiedLocalVarsStack[It->second];
11398 auto I = UntiedData.find(VD);
11399 if (I != UntiedData.end()) {
11400 UntiedAddr = I->second.first;
11401 UntiedRealAddr = I->second.second;
11402 }
11403 }
11404 const VarDecl *CVD = VD->getCanonicalDecl();
11405 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11406 // Use the default allocation.
11407 if (!isAllocatableDecl(VD))
11408 return UntiedAddr;
11409 llvm::Value *Size;
11410 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11411 if (CVD->getType()->isVariablyModifiedType()) {
11412 Size = CGF.getTypeSize(CVD->getType());
11413 // Align the size: ((size + align - 1) / align) * align
11414 Size = CGF.Builder.CreateNUWAdd(
11415 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11416 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11417 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11418 } else {
11419 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11420 Size = CGM.getSize(Sz.alignTo(Align));
11421 }
11422 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11423 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11424 const Expr *Allocator = AA->getAllocator();
11425 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11426 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11427 SmallVector<llvm::Value *, 4> Args;
11428 Args.push_back(ThreadID);
11429 if (Alignment)
11430 Args.push_back(Alignment);
11431 Args.push_back(Size);
11432 Args.push_back(AllocVal);
11433 llvm::omp::RuntimeFunction FnID =
11434 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11435 llvm::Value *Addr = CGF.EmitRuntimeCall(
11436 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11437 getName({CVD->getName(), ".void.addr"}));
11438 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11439 CGM.getModule(), OMPRTL___kmpc_free);
11440 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11441 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11442 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11443 if (UntiedAddr.isValid())
11444 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11445
11446 // Cleanup action for allocate support.
11447 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11448 llvm::FunctionCallee RTLFn;
11449 SourceLocation::UIntTy LocEncoding;
11450 Address Addr;
11451 const Expr *AllocExpr;
11452
11453 public:
11454 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11455 SourceLocation::UIntTy LocEncoding, Address Addr,
11456 const Expr *AllocExpr)
11457 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11458 AllocExpr(AllocExpr) {}
11459 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11460 if (!CGF.HaveInsertPoint())
11461 return;
11462 llvm::Value *Args[3];
11463 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11464 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11465 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11466 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11467 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11468 Args[2] = AllocVal;
11469 CGF.EmitRuntimeCall(RTLFn, Args);
11470 }
11471 };
11472 Address VDAddr =
11473 UntiedRealAddr.isValid()
11474 ? UntiedRealAddr
11475 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11476 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11477 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11478 VDAddr, Allocator);
11479 if (UntiedRealAddr.isValid())
11480 if (auto *Region =
11481 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11482 Region->emitUntiedSwitch(CGF);
11483 return VDAddr;
11484 }
11485 return UntiedAddr;
11486 }
11487
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const11488 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11489 const VarDecl *VD) const {
11490 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11491 if (It == FunctionToUntiedTaskStackMap.end())
11492 return false;
11493 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11494 }
11495
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11496 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11497 CodeGenModule &CGM, const OMPLoopDirective &S)
11498 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11499 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11500 if (!NeedToPush)
11501 return;
11502 NontemporalDeclsSet &DS =
11503 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11504 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11505 for (const Stmt *Ref : C->private_refs()) {
11506 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11507 const ValueDecl *VD;
11508 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11509 VD = DRE->getDecl();
11510 } else {
11511 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11512 assert((ME->isImplicitCXXThis() ||
11513 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11514 "Expected member of current class.");
11515 VD = ME->getMemberDecl();
11516 }
11517 DS.insert(VD);
11518 }
11519 }
11520 }
11521
~NontemporalDeclsRAII()11522 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11523 if (!NeedToPush)
11524 return;
11525 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11526 }
11527
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)11528 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11529 CodeGenFunction &CGF,
11530 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11531 std::pair<Address, Address>> &LocalVars)
11532 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11533 if (!NeedToPush)
11534 return;
11535 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11536 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11537 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11538 }
11539
~UntiedTaskLocalDeclsRAII()11540 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11541 if (!NeedToPush)
11542 return;
11543 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11544 }
11545
isNontemporalDecl(const ValueDecl * VD) const11546 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11547 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11548
11549 return llvm::any_of(
11550 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11551 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11552 }
11553
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11554 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11555 const OMPExecutableDirective &S,
11556 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11557 const {
11558 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11559 // Vars in target/task regions must be excluded completely.
11560 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11561 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11562 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11563 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11564 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11565 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11566 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11567 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11568 }
11569 }
11570 // Exclude vars in private clauses.
11571 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11572 for (const Expr *Ref : C->varlist()) {
11573 if (!Ref->getType()->isScalarType())
11574 continue;
11575 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11576 if (!DRE)
11577 continue;
11578 NeedToCheckForLPCs.insert(DRE->getDecl());
11579 }
11580 }
11581 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11582 for (const Expr *Ref : C->varlist()) {
11583 if (!Ref->getType()->isScalarType())
11584 continue;
11585 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11586 if (!DRE)
11587 continue;
11588 NeedToCheckForLPCs.insert(DRE->getDecl());
11589 }
11590 }
11591 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11592 for (const Expr *Ref : C->varlist()) {
11593 if (!Ref->getType()->isScalarType())
11594 continue;
11595 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11596 if (!DRE)
11597 continue;
11598 NeedToCheckForLPCs.insert(DRE->getDecl());
11599 }
11600 }
11601 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11602 for (const Expr *Ref : C->varlist()) {
11603 if (!Ref->getType()->isScalarType())
11604 continue;
11605 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11606 if (!DRE)
11607 continue;
11608 NeedToCheckForLPCs.insert(DRE->getDecl());
11609 }
11610 }
11611 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11612 for (const Expr *Ref : C->varlist()) {
11613 if (!Ref->getType()->isScalarType())
11614 continue;
11615 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11616 if (!DRE)
11617 continue;
11618 NeedToCheckForLPCs.insert(DRE->getDecl());
11619 }
11620 }
11621 for (const Decl *VD : NeedToCheckForLPCs) {
11622 for (const LastprivateConditionalData &Data :
11623 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11624 if (Data.DeclToUniqueName.count(VD) > 0) {
11625 if (!Data.Disabled)
11626 NeedToAddForLPCsAsDisabled.insert(VD);
11627 break;
11628 }
11629 }
11630 }
11631 }
11632
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11633 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11634 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11635 : CGM(CGF.CGM),
11636 Action((CGM.getLangOpts().OpenMP >= 50 &&
11637 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11638 [](const OMPLastprivateClause *C) {
11639 return C->getKind() ==
11640 OMPC_LASTPRIVATE_conditional;
11641 }))
11642 ? ActionToDo::PushAsLastprivateConditional
11643 : ActionToDo::DoNotPush) {
11644 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11645 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11646 return;
11647 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11648 "Expected a push action.");
11649 LastprivateConditionalData &Data =
11650 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11651 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11652 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11653 continue;
11654
11655 for (const Expr *Ref : C->varlist()) {
11656 Data.DeclToUniqueName.insert(std::make_pair(
11657 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11658 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11659 }
11660 }
11661 Data.IVLVal = IVLVal;
11662 Data.Fn = CGF.CurFn;
11663 }
11664
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11665 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11666 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11667 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11668 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11669 if (CGM.getLangOpts().OpenMP < 50)
11670 return;
11671 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11672 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11673 if (!NeedToAddForLPCsAsDisabled.empty()) {
11674 Action = ActionToDo::DisableLastprivateConditional;
11675 LastprivateConditionalData &Data =
11676 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11677 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11678 Data.DeclToUniqueName.try_emplace(VD);
11679 Data.Fn = CGF.CurFn;
11680 Data.Disabled = true;
11681 }
11682 }
11683
11684 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11685 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11686 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11687 return LastprivateConditionalRAII(CGF, S);
11688 }
11689
~LastprivateConditionalRAII()11690 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11691 if (CGM.getLangOpts().OpenMP < 50)
11692 return;
11693 if (Action == ActionToDo::DisableLastprivateConditional) {
11694 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11695 "Expected list of disabled private vars.");
11696 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11697 }
11698 if (Action == ActionToDo::PushAsLastprivateConditional) {
11699 assert(
11700 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11701 "Expected list of lastprivate conditional vars.");
11702 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11703 }
11704 }
11705
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11706 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11707 const VarDecl *VD) {
11708 ASTContext &C = CGM.getContext();
11709 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11710 QualType NewType;
11711 const FieldDecl *VDField;
11712 const FieldDecl *FiredField;
11713 LValue BaseLVal;
11714 auto VI = I->getSecond().find(VD);
11715 if (VI == I->getSecond().end()) {
11716 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11717 RD->startDefinition();
11718 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11719 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11720 RD->completeDefinition();
11721 NewType = C.getRecordType(RD);
11722 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11723 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11724 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11725 } else {
11726 NewType = std::get<0>(VI->getSecond());
11727 VDField = std::get<1>(VI->getSecond());
11728 FiredField = std::get<2>(VI->getSecond());
11729 BaseLVal = std::get<3>(VI->getSecond());
11730 }
11731 LValue FiredLVal =
11732 CGF.EmitLValueForField(BaseLVal, FiredField);
11733 CGF.EmitStoreOfScalar(
11734 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11735 FiredLVal);
11736 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11737 }
11738
11739 namespace {
11740 /// Checks if the lastprivate conditional variable is referenced in LHS.
11741 class LastprivateConditionalRefChecker final
11742 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11743 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11744 const Expr *FoundE = nullptr;
11745 const Decl *FoundD = nullptr;
11746 StringRef UniqueDeclName;
11747 LValue IVLVal;
11748 llvm::Function *FoundFn = nullptr;
11749 SourceLocation Loc;
11750
11751 public:
VisitDeclRefExpr(const DeclRefExpr * E)11752 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11753 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11754 llvm::reverse(LPM)) {
11755 auto It = D.DeclToUniqueName.find(E->getDecl());
11756 if (It == D.DeclToUniqueName.end())
11757 continue;
11758 if (D.Disabled)
11759 return false;
11760 FoundE = E;
11761 FoundD = E->getDecl()->getCanonicalDecl();
11762 UniqueDeclName = It->second;
11763 IVLVal = D.IVLVal;
11764 FoundFn = D.Fn;
11765 break;
11766 }
11767 return FoundE == E;
11768 }
VisitMemberExpr(const MemberExpr * E)11769 bool VisitMemberExpr(const MemberExpr *E) {
11770 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11771 return false;
11772 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11773 llvm::reverse(LPM)) {
11774 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11775 if (It == D.DeclToUniqueName.end())
11776 continue;
11777 if (D.Disabled)
11778 return false;
11779 FoundE = E;
11780 FoundD = E->getMemberDecl()->getCanonicalDecl();
11781 UniqueDeclName = It->second;
11782 IVLVal = D.IVLVal;
11783 FoundFn = D.Fn;
11784 break;
11785 }
11786 return FoundE == E;
11787 }
VisitStmt(const Stmt * S)11788 bool VisitStmt(const Stmt *S) {
11789 for (const Stmt *Child : S->children()) {
11790 if (!Child)
11791 continue;
11792 if (const auto *E = dyn_cast<Expr>(Child))
11793 if (!E->isGLValue())
11794 continue;
11795 if (Visit(Child))
11796 return true;
11797 }
11798 return false;
11799 }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11800 explicit LastprivateConditionalRefChecker(
11801 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11802 : LPM(LPM) {}
11803 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11804 getFoundData() const {
11805 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11806 }
11807 };
11808 } // namespace
11809
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11810 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11811 LValue IVLVal,
11812 StringRef UniqueDeclName,
11813 LValue LVal,
11814 SourceLocation Loc) {
11815 // Last updated loop counter for the lastprivate conditional var.
11816 // int<xx> last_iv = 0;
11817 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11818 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11819 LLIVTy, getName({UniqueDeclName, "iv"}));
11820 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11821 IVLVal.getAlignment().getAsAlign());
11822 LValue LastIVLVal =
11823 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11824
11825 // Last value of the lastprivate conditional.
11826 // decltype(priv_a) last_a;
11827 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11828 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11829 cast<llvm::GlobalVariable>(Last)->setAlignment(
11830 LVal.getAlignment().getAsAlign());
11831 LValue LastLVal =
11832 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11833
11834 // Global loop counter. Required to handle inner parallel-for regions.
11835 // iv
11836 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11837
11838 // #pragma omp critical(a)
11839 // if (last_iv <= iv) {
11840 // last_iv = iv;
11841 // last_a = priv_a;
11842 // }
11843 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11844 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11845 Action.Enter(CGF);
11846 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11847 // (last_iv <= iv) ? Check if the variable is updated and store new
11848 // value in global var.
11849 llvm::Value *CmpRes;
11850 if (IVLVal.getType()->isSignedIntegerType()) {
11851 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11852 } else {
11853 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11854 "Loop iteration variable must be integer.");
11855 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11856 }
11857 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11858 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11859 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11860 // {
11861 CGF.EmitBlock(ThenBB);
11862
11863 // last_iv = iv;
11864 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11865
11866 // last_a = priv_a;
11867 switch (CGF.getEvaluationKind(LVal.getType())) {
11868 case TEK_Scalar: {
11869 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11870 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11871 break;
11872 }
11873 case TEK_Complex: {
11874 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11875 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11876 break;
11877 }
11878 case TEK_Aggregate:
11879 llvm_unreachable(
11880 "Aggregates are not supported in lastprivate conditional.");
11881 }
11882 // }
11883 CGF.EmitBranch(ExitBB);
11884 // There is no need to emit line number for unconditional branch.
11885 (void)ApplyDebugLocation::CreateEmpty(CGF);
11886 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11887 };
11888
11889 if (CGM.getLangOpts().OpenMPSimd) {
11890 // Do not emit as a critical region as no parallel region could be emitted.
11891 RegionCodeGenTy ThenRCG(CodeGen);
11892 ThenRCG(CGF);
11893 } else {
11894 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11895 }
11896 }
11897
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11898 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11899 const Expr *LHS) {
11900 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11901 return;
11902 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11903 if (!Checker.Visit(LHS))
11904 return;
11905 const Expr *FoundE;
11906 const Decl *FoundD;
11907 StringRef UniqueDeclName;
11908 LValue IVLVal;
11909 llvm::Function *FoundFn;
11910 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11911 Checker.getFoundData();
11912 if (FoundFn != CGF.CurFn) {
11913 // Special codegen for inner parallel regions.
11914 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11915 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11916 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11917 "Lastprivate conditional is not found in outer region.");
11918 QualType StructTy = std::get<0>(It->getSecond());
11919 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11920 LValue PrivLVal = CGF.EmitLValue(FoundE);
11921 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11922 PrivLVal.getAddress(),
11923 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11924 CGF.ConvertTypeForMem(StructTy));
11925 LValue BaseLVal =
11926 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11927 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11928 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11929 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11930 FiredLVal, llvm::AtomicOrdering::Unordered,
11931 /*IsVolatile=*/true, /*isInit=*/false);
11932 return;
11933 }
11934
11935 // Private address of the lastprivate conditional in the current context.
11936 // priv_a
11937 LValue LVal = CGF.EmitLValue(FoundE);
11938 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11939 FoundE->getExprLoc());
11940 }
11941
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11942 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11943 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11944 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11945 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11946 return;
11947 auto Range = llvm::reverse(LastprivateConditionalStack);
11948 auto It = llvm::find_if(
11949 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11950 if (It == Range.end() || It->Fn != CGF.CurFn)
11951 return;
11952 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11953 assert(LPCI != LastprivateConditionalToTypes.end() &&
11954 "Lastprivates must be registered already.");
11955 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11956 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11957 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11958 for (const auto &Pair : It->DeclToUniqueName) {
11959 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11960 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11961 continue;
11962 auto I = LPCI->getSecond().find(Pair.first);
11963 assert(I != LPCI->getSecond().end() &&
11964 "Lastprivate must be rehistered already.");
11965 // bool Cmp = priv_a.Fired != 0;
11966 LValue BaseLVal = std::get<3>(I->getSecond());
11967 LValue FiredLVal =
11968 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11969 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11970 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11971 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11972 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11973 // if (Cmp) {
11974 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11975 CGF.EmitBlock(ThenBB);
11976 Address Addr = CGF.GetAddrOfLocalVar(VD);
11977 LValue LVal;
11978 if (VD->getType()->isReferenceType())
11979 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11980 AlignmentSource::Decl);
11981 else
11982 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11983 AlignmentSource::Decl);
11984 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11985 D.getBeginLoc());
11986 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11987 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11988 // }
11989 }
11990 }
11991
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11992 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11993 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11994 SourceLocation Loc) {
11995 if (CGF.getLangOpts().OpenMP < 50)
11996 return;
11997 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11998 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11999 "Unknown lastprivate conditional variable.");
12000 StringRef UniqueName = It->second;
12001 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12002 // The variable was not updated in the region - exit.
12003 if (!GV)
12004 return;
12005 LValue LPLVal = CGF.MakeRawAddrLValue(
12006 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12007 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12008 CGF.EmitStoreOfScalar(Res, PrivLVal);
12009 }
12010
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12011 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12012 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12013 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12014 const RegionCodeGenTy &CodeGen) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016 }
12017
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12018 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12019 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12020 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12021 const RegionCodeGenTy &CodeGen) {
12022 llvm_unreachable("Not supported in SIMD-only mode");
12023 }
12024
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)12025 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12026 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12027 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12028 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12029 bool Tied, unsigned &NumberOfParts) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12031 }
12032
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)12033 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12034 SourceLocation Loc,
12035 llvm::Function *OutlinedFn,
12036 ArrayRef<llvm::Value *> CapturedVars,
12037 const Expr *IfCond,
12038 llvm::Value *NumThreads) {
12039 llvm_unreachable("Not supported in SIMD-only mode");
12040 }
12041
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)12042 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12043 CodeGenFunction &CGF, StringRef CriticalName,
12044 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12045 const Expr *Hint) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12047 }
12048
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)12049 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12050 const RegionCodeGenTy &MasterOpGen,
12051 SourceLocation Loc) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12053 }
12054
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)12055 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12056 const RegionCodeGenTy &MasterOpGen,
12057 SourceLocation Loc,
12058 const Expr *Filter) {
12059 llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)12062 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12063 SourceLocation Loc) {
12064 llvm_unreachable("Not supported in SIMD-only mode");
12065 }
12066
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12067 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12068 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12069 SourceLocation Loc) {
12070 llvm_unreachable("Not supported in SIMD-only mode");
12071 }
12072
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12073 void CGOpenMPSIMDRuntime::emitSingleRegion(
12074 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12075 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12076 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12077 ArrayRef<const Expr *> AssignmentOps) {
12078 llvm_unreachable("Not supported in SIMD-only mode");
12079 }
12080
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12081 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12082 const RegionCodeGenTy &OrderedOpGen,
12083 SourceLocation Loc,
12084 bool IsThreads) {
12085 llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12088 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12089 SourceLocation Loc,
12090 OpenMPDirectiveKind Kind,
12091 bool EmitChecks,
12092 bool ForceSimpleCall) {
12093 llvm_unreachable("Not supported in SIMD-only mode");
12094 }
12095
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12096 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12097 CodeGenFunction &CGF, SourceLocation Loc,
12098 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12099 bool Ordered, const DispatchRTInput &DispatchValues) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12101 }
12102
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)12103 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12104 SourceLocation Loc) {
12105 llvm_unreachable("Not supported in SIMD-only mode");
12106 }
12107
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12108 void CGOpenMPSIMDRuntime::emitForStaticInit(
12109 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12110 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12111 llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12114 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12115 CodeGenFunction &CGF, SourceLocation Loc,
12116 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12117 llvm_unreachable("Not supported in SIMD-only mode");
12118 }
12119
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12120 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12121 SourceLocation Loc,
12122 unsigned IVSize,
12123 bool IVSigned) {
12124 llvm_unreachable("Not supported in SIMD-only mode");
12125 }
12126
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12127 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12128 SourceLocation Loc,
12129 OpenMPDirectiveKind DKind) {
12130 llvm_unreachable("Not supported in SIMD-only mode");
12131 }
12132
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12133 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12134 SourceLocation Loc,
12135 unsigned IVSize, bool IVSigned,
12136 Address IL, Address LB,
12137 Address UB, Address ST) {
12138 llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12141 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12142 llvm::Value *NumThreads,
12143 SourceLocation Loc) {
12144 llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12147 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12148 ProcBindKind ProcBind,
12149 SourceLocation Loc) {
12150 llvm_unreachable("Not supported in SIMD-only mode");
12151 }
12152
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12153 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12154 const VarDecl *VD,
12155 Address VDAddr,
12156 SourceLocation Loc) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12158 }
12159
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12160 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12161 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12162 CodeGenFunction *CGF) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12164 }
12165
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12166 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12167 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169 }
12170
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12171 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12172 ArrayRef<const Expr *> Vars,
12173 SourceLocation Loc,
12174 llvm::AtomicOrdering AO) {
12175 llvm_unreachable("Not supported in SIMD-only mode");
12176 }
12177
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12178 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12179 const OMPExecutableDirective &D,
12180 llvm::Function *TaskFunction,
12181 QualType SharedsTy, Address Shareds,
12182 const Expr *IfCond,
12183 const OMPTaskDataTy &Data) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12185 }
12186
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12187 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12188 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12189 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12190 const Expr *IfCond, const OMPTaskDataTy &Data) {
12191 llvm_unreachable("Not supported in SIMD-only mode");
12192 }
12193
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12194 void CGOpenMPSIMDRuntime::emitReduction(
12195 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12196 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12197 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12198 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12199 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12200 ReductionOps, Options);
12201 }
12202
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12203 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12204 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12205 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12206 llvm_unreachable("Not supported in SIMD-only mode");
12207 }
12208
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12209 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12210 SourceLocation Loc,
12211 bool IsWorksharingReduction) {
12212 llvm_unreachable("Not supported in SIMD-only mode");
12213 }
12214
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12215 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12216 SourceLocation Loc,
12217 ReductionCodeGen &RCG,
12218 unsigned N) {
12219 llvm_unreachable("Not supported in SIMD-only mode");
12220 }
12221
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12222 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12223 SourceLocation Loc,
12224 llvm::Value *ReductionsPtr,
12225 LValue SharedLVal) {
12226 llvm_unreachable("Not supported in SIMD-only mode");
12227 }
12228
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)12229 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12230 SourceLocation Loc,
12231 const OMPTaskDataTy &Data) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12233 }
12234
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12235 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12236 CodeGenFunction &CGF, SourceLocation Loc,
12237 OpenMPDirectiveKind CancelRegion) {
12238 llvm_unreachable("Not supported in SIMD-only mode");
12239 }
12240
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12241 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12242 SourceLocation Loc, const Expr *IfCond,
12243 OpenMPDirectiveKind CancelRegion) {
12244 llvm_unreachable("Not supported in SIMD-only mode");
12245 }
12246
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12247 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12248 const OMPExecutableDirective &D, StringRef ParentName,
12249 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12250 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12251 llvm_unreachable("Not supported in SIMD-only mode");
12252 }
12253
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12254 void CGOpenMPSIMDRuntime::emitTargetCall(
12255 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12256 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12257 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12258 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12259 const OMPLoopDirective &D)>
12260 SizeEmitter) {
12261 llvm_unreachable("Not supported in SIMD-only mode");
12262 }
12263
emitTargetFunctions(GlobalDecl GD)12264 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12265 llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267
emitTargetGlobalVariable(GlobalDecl GD)12268 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12269 llvm_unreachable("Not supported in SIMD-only mode");
12270 }
12271
emitTargetGlobal(GlobalDecl GD)12272 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12273 return false;
12274 }
12275
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)12276 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12277 const OMPExecutableDirective &D,
12278 SourceLocation Loc,
12279 llvm::Function *OutlinedFn,
12280 ArrayRef<llvm::Value *> CapturedVars) {
12281 llvm_unreachable("Not supported in SIMD-only mode");
12282 }
12283
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)12284 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12285 const Expr *NumTeams,
12286 const Expr *ThreadLimit,
12287 SourceLocation Loc) {
12288 llvm_unreachable("Not supported in SIMD-only mode");
12289 }
12290
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)12291 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12292 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12293 const Expr *Device, const RegionCodeGenTy &CodeGen,
12294 CGOpenMPRuntime::TargetDataInfo &Info) {
12295 llvm_unreachable("Not supported in SIMD-only mode");
12296 }
12297
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)12298 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12299 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12300 const Expr *Device) {
12301 llvm_unreachable("Not supported in SIMD-only mode");
12302 }
12303
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12304 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12305 const OMPLoopDirective &D,
12306 ArrayRef<Expr *> NumIterations) {
12307 llvm_unreachable("Not supported in SIMD-only mode");
12308 }
12309
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12310 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12311 const OMPDependClause *C) {
12312 llvm_unreachable("Not supported in SIMD-only mode");
12313 }
12314
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)12315 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12316 const OMPDoacrossClause *C) {
12317 llvm_unreachable("Not supported in SIMD-only mode");
12318 }
12319
12320 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12321 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12322 const VarDecl *NativeParam) const {
12323 llvm_unreachable("Not supported in SIMD-only mode");
12324 }
12325
12326 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12327 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12328 const VarDecl *NativeParam,
12329 const VarDecl *TargetParam) const {
12330 llvm_unreachable("Not supported in SIMD-only mode");
12331 }
12332