1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/BitmaskEnum.h"
27 #include "clang/Basic/FileManager.h"
28 #include "clang/Basic/OpenMPKinds.h"
29 #include "clang/Basic/SourceManager.h"
30 #include "clang/CodeGen/ConstantInitBuilder.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/SetOperations.h"
33 #include "llvm/ADT/SmallBitVector.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/StringExtras.h"
36 #include "llvm/Bitcode/BitcodeReader.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Support/AtomicOrdering.h"
43 #include "llvm/Support/Format.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <numeric>
48 #include <optional>
49
50 using namespace clang;
51 using namespace CodeGen;
52 using namespace llvm::omp;
53
54 namespace {
55 /// Base class for handling code generation inside OpenMP regions.
56 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57 public:
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind {
60 /// Region with outlined function for standalone 'parallel'
61 /// directive.
62 ParallelOutlinedRegion,
63 /// Region with outlined function for standalone 'task' directive.
64 TaskOutlinedRegion,
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
67 InlinedRegion,
68 /// Region with outlined function for standalone 'target' directive.
69 TargetRegion,
70 };
71
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)72 CGOpenMPRegionInfo(const CapturedStmt &CS,
73 const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81 bool HasCancel)
82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83 Kind(Kind), HasCancel(HasCancel) {}
84
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl *getThreadIDVariable() const = 0;
88
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
emitUntiedSwitch(CodeGenFunction &)96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
getRegionKind() const98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
getDirectiveKind() const100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
hasCancel() const102 bool hasCancel() const { return HasCancel; }
103
classof(const CGCapturedStmtInfo * Info)104 static bool classof(const CGCapturedStmtInfo *Info) {
105 return Info->getKind() == CR_OpenMP;
106 }
107
108 ~CGOpenMPRegionInfo() override = default;
109
110 protected:
111 CGOpenMPRegionKind RegionKind;
112 RegionCodeGenTy CodeGen;
113 OpenMPDirectiveKind Kind;
114 bool HasCancel;
115 };
116
117 /// API for captured statement code generation in OpenMP constructs.
118 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121 const RegionCodeGenTy &CodeGen,
122 OpenMPDirectiveKind Kind, bool HasCancel,
123 StringRef HelperName)
124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125 HasCancel),
126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 }
129
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
getThreadIDVariable() const132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134 /// Get the name of the capture helper.
getHelperName() const135 StringRef getHelperName() const override { return HelperName; }
136
classof(const CGCapturedStmtInfo * Info)137 static bool classof(const CGCapturedStmtInfo *Info) {
138 return CGOpenMPRegionInfo::classof(Info) &&
139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140 ParallelOutlinedRegion;
141 }
142
143 private:
144 /// A variable or parameter storing global thread id for OpenMP
145 /// constructs.
146 const VarDecl *ThreadIDVar;
147 StringRef HelperName;
148 };
149
150 /// API for captured statement code generation in OpenMP constructs.
151 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152 public:
153 class UntiedTaskActionTy final : public PrePostActionTy {
154 bool Untied;
155 const VarDecl *PartIDVar;
156 const RegionCodeGenTy UntiedCodeGen;
157 llvm::SwitchInst *UntiedSwitch = nullptr;
158
159 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161 const RegionCodeGenTy &UntiedCodeGen)
162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)163 void Enter(CodeGenFunction &CGF) override {
164 if (Untied) {
165 // Emit task switching point.
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 CGF.GetAddrOfLocalVar(PartIDVar),
168 PartIDVar->getType()->castAs<PointerType>());
169 llvm::Value *Res =
170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173 CGF.EmitBlock(DoneBB);
174 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177 CGF.Builder.GetInsertBlock());
178 emitUntiedSwitch(CGF);
179 }
180 }
emitUntiedSwitch(CodeGenFunction & CGF) const181 void emitUntiedSwitch(CodeGenFunction &CGF) const {
182 if (Untied) {
183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184 CGF.GetAddrOfLocalVar(PartIDVar),
185 PartIDVar->getType()->castAs<PointerType>());
186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 PartIdLVal);
188 UntiedCodeGen(CGF);
189 CodeGenFunction::JumpDest CurPoint =
190 CGF.getJumpDestInCurrentScope(".untied.next.");
191 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194 CGF.Builder.GetInsertBlock());
195 CGF.EmitBranchThroughCleanup(CurPoint);
196 CGF.EmitBlock(CurPoint.getBlock());
197 }
198 }
getNumberOfParts() const199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202 const VarDecl *ThreadIDVar,
203 const RegionCodeGenTy &CodeGen,
204 OpenMPDirectiveKind Kind, bool HasCancel,
205 const UntiedTaskActionTy &Action)
206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207 ThreadIDVar(ThreadIDVar), Action(Action) {
208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 }
210
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
getThreadIDVariable() const213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215 /// Get an LValue for the current ThreadID variable.
216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218 /// Get the name of the capture helper.
getHelperName() const219 StringRef getHelperName() const override { return ".omp_outlined."; }
220
emitUntiedSwitch(CodeGenFunction & CGF)221 void emitUntiedSwitch(CodeGenFunction &CGF) override {
222 Action.emitUntiedSwitch(CGF);
223 }
224
classof(const CGCapturedStmtInfo * Info)225 static bool classof(const CGCapturedStmtInfo *Info) {
226 return CGOpenMPRegionInfo::classof(Info) &&
227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228 TaskOutlinedRegion;
229 }
230
231 private:
232 /// A variable or parameter storing global thread id for OpenMP
233 /// constructs.
234 const VarDecl *ThreadIDVar;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy &Action;
237 };
238
239 /// API for inlined captured statement code generation in OpenMP
240 /// constructs.
241 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244 const RegionCodeGenTy &CodeGen,
245 OpenMPDirectiveKind Kind, bool HasCancel)
246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247 OldCSI(OldCSI),
248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249
250 // Retrieve the value of the context parameter.
getContextValue() const251 llvm::Value *getContextValue() const override {
252 if (OuterRegionInfo)
253 return OuterRegionInfo->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
setContextValue(llvm::Value * V)257 void setContextValue(llvm::Value *V) override {
258 if (OuterRegionInfo) {
259 OuterRegionInfo->setContextValue(V);
260 return;
261 }
262 llvm_unreachable("No context value for inlined OpenMP region");
263 }
264
265 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const266 const FieldDecl *lookup(const VarDecl *VD) const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->lookup(VD);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
271 return nullptr;
272 }
273
getThisFieldDecl() const274 FieldDecl *getThisFieldDecl() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThisFieldDecl();
277 return nullptr;
278 }
279
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
getThreadIDVariable() const282 const VarDecl *getThreadIDVariable() const override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariable();
285 return nullptr;
286 }
287
288 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290 if (OuterRegionInfo)
291 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
293 }
294
295 /// Get the name of the capture helper.
getHelperName() const296 StringRef getHelperName() const override {
297 if (auto *OuterRegionInfo = getOldCSI())
298 return OuterRegionInfo->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
300 }
301
emitUntiedSwitch(CodeGenFunction & CGF)302 void emitUntiedSwitch(CodeGenFunction &CGF) override {
303 if (OuterRegionInfo)
304 OuterRegionInfo->emitUntiedSwitch(CGF);
305 }
306
getOldCSI() const307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
classof(const CGCapturedStmtInfo * Info)309 static bool classof(const CGCapturedStmtInfo *Info) {
310 return CGOpenMPRegionInfo::classof(Info) &&
311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 }
313
314 ~CGOpenMPInlinedRegionInfo() override = default;
315
316 private:
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319 CGOpenMPRegionInfo *OuterRegionInfo;
320 };
321
322 /// API for captured statement code generation in OpenMP target
323 /// constructs. For this captures, implicit parameters are used instead of the
324 /// captured fields. The name of the target region has to be unique in a given
325 /// application so it is provided by the client, because only the client has
326 /// the information to generate that.
327 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330 const RegionCodeGenTy &CodeGen, StringRef HelperName)
331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332 /*HasCancel=*/false),
333 HelperName(HelperName) {}
334
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
getThreadIDVariable() const337 const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339 /// Get the name of the capture helper.
getHelperName() const340 StringRef getHelperName() const override { return HelperName; }
341
classof(const CGCapturedStmtInfo * Info)342 static bool classof(const CGCapturedStmtInfo *Info) {
343 return CGOpenMPRegionInfo::classof(Info) &&
344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 }
346
347 private:
348 StringRef HelperName;
349 };
350
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)351 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352 llvm_unreachable("No codegen for expressions");
353 }
354 /// API for generation of expressions captured in a innermost OpenMP
355 /// region.
356 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360 OMPD_unknown,
361 /*HasCancel=*/false),
362 PrivScope(CGF) {
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C : CS.captures()) {
367 if (!C.capturesVariable() && !C.capturesVariableByCopy())
368 continue;
369
370 const VarDecl *VD = C.getCapturedVar();
371 if (VD->isLocalVarDeclOrParm())
372 continue;
373
374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375 /*RefersToEnclosingVariableOrCapture=*/false,
376 VD->getType().getNonReferenceType(), VK_LValue,
377 C.getLocation());
378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379 }
380 (void)PrivScope.Privatize();
381 }
382
383 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const384 const FieldDecl *lookup(const VarDecl *VD) const override {
385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386 return FD;
387 return nullptr;
388 }
389
390 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392 llvm_unreachable("No body for expressions");
393 }
394
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
getThreadIDVariable() const397 const VarDecl *getThreadIDVariable() const override {
398 llvm_unreachable("No thread id for expressions");
399 }
400
401 /// Get the name of the capture helper.
getHelperName() const402 StringRef getHelperName() const override {
403 llvm_unreachable("No helper name for expressions");
404 }
405
classof(const CGCapturedStmtInfo * Info)406 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408 private:
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope;
411 };
412
413 /// RAII for emitting code of OpenMP constructs.
414 class InlinedOpenMPRegionRAII {
415 CodeGenFunction &CGF;
416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417 FieldDecl *LambdaThisCaptureField = nullptr;
418 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419 bool NoInheritance = false;
420
421 public:
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
425 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427 OpenMPDirectiveKind Kind, bool HasCancel,
428 bool NoInheritance = true)
429 : CGF(CGF), NoInheritance(NoInheritance) {
430 // Start emission for the construct.
431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433 if (NoInheritance) {
434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436 CGF.LambdaThisCaptureField = nullptr;
437 BlockInfo = CGF.BlockInfo;
438 CGF.BlockInfo = nullptr;
439 }
440 }
441
~InlinedOpenMPRegionRAII()442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
444 auto *OldCSI =
445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446 delete CGF.CapturedStmtInfo;
447 CGF.CapturedStmtInfo = OldCSI;
448 if (NoInheritance) {
449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451 CGF.BlockInfo = BlockInfo;
452 }
453 }
454 };
455
456 /// Values for bit flags used in the ident_t to describe the fields.
457 /// All enumeric elements are named and described in accordance with the code
458 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459 enum OpenMPLocationFlags : unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD = 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC = 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE = 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL = 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL = 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP = 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS = 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483 };
484
485 /// Describes ident structure that describes a source location.
486 /// All descriptions are taken from
487 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488 /// Original structure:
489 /// typedef struct ident {
490 /// kmp_int32 reserved_1; /**< might be used in Fortran;
491 /// see above */
492 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493 /// KMP_IDENT_KMPC identifies this union
494 /// member */
495 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496 /// see above */
497 ///#if USE_ITT_BUILD
498 /// /* but currently used for storing
499 /// region-specific ITT */
500 /// /* contextual information. */
501 ///#endif /* USE_ITT_BUILD */
502 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503 /// C++ */
504 /// char const *psource; /**< String describing the source location.
505 /// The string is composed of semi-colon separated
506 // fields which describe the source file,
507 /// the function and a pair of line numbers that
508 /// delimit the construct.
509 /// */
510 /// } ident_t;
511 enum IdentFieldIndex {
512 /// might be used in Fortran
513 IdentField_Reserved_1,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515 IdentField_Flags,
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
523 IdentField_PSource
524 };
525
526 /// Schedule types for 'omp for' loops (these enumerators are taken from
527 /// the enum sched_type in kmp.h).
528 enum OpenMPSchedType {
529 /// Lower bound for default (unordered) versions.
530 OMP_sch_lower = 32,
531 OMP_sch_static_chunked = 33,
532 OMP_sch_static = 34,
533 OMP_sch_dynamic_chunked = 35,
534 OMP_sch_guided_chunked = 36,
535 OMP_sch_runtime = 37,
536 OMP_sch_auto = 38,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked = 45,
539 /// Lower bound for 'ordered' versions.
540 OMP_ord_lower = 64,
541 OMP_ord_static_chunked = 65,
542 OMP_ord_static = 66,
543 OMP_ord_dynamic_chunked = 67,
544 OMP_ord_guided_chunked = 68,
545 OMP_ord_runtime = 69,
546 OMP_ord_auto = 70,
547 OMP_sch_default = OMP_sch_static,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked = 91,
550 OMP_dist_sch_static = 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic = (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic = (1 << 30),
556 };
557
558 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559 /// region.
560 class CleanupTy final : public EHScopeStack::Cleanup {
561 PrePostActionTy *Action;
562
563 public:
CleanupTy(PrePostActionTy * Action)564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566 if (!CGF.HaveInsertPoint())
567 return;
568 Action->Exit(CGF);
569 }
570 };
571
572 } // anonymous namespace
573
operator ()(CodeGenFunction & CGF) const574 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
575 CodeGenFunction::RunCleanupsScope Scope(CGF);
576 if (PrePostAction) {
577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578 Callback(CodeGen, CGF, *PrePostAction);
579 } else {
580 PrePostActionTy Action;
581 Callback(CodeGen, CGF, Action);
582 }
583 }
584
585 /// Check if the combiner is a call to UDR combiner and if it is so return the
586 /// UDR decl used for reduction.
587 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)588 getReductionInit(const Expr *ReductionOp) {
589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591 if (const auto *DRE =
592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594 return DRD;
595 return nullptr;
596 }
597
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)598 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
599 const OMPDeclareReductionDecl *DRD,
600 const Expr *InitOp,
601 Address Private, Address Original,
602 QualType Ty) {
603 if (DRD->getInitializer()) {
604 std::pair<llvm::Function *, llvm::Function *> Reduction =
605 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
606 const auto *CE = cast<CallExpr>(InitOp);
607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610 const auto *LHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612 const auto *RHSDRE =
613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617 (void)PrivateScope.Privatize();
618 RValue Func = RValue::get(Reduction.second);
619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620 CGF.EmitIgnoredExpr(InitOp);
621 } else {
622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624 auto *GV = new llvm::GlobalVariable(
625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage, Init, Name);
627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628 RValue InitRVal;
629 switch (CGF.getEvaluationKind(Ty)) {
630 case TEK_Scalar:
631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632 break;
633 case TEK_Complex:
634 InitRVal =
635 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
636 break;
637 case TEK_Aggregate: {
638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641 /*IsInitializer=*/false);
642 return;
643 }
644 }
645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648 /*IsInitializer=*/false);
649 }
650 }
651
652 /// Emit initialization of arrays of complex types.
653 /// \param DestAddr Address of the array.
654 /// \param Type Type of array.
655 /// \param Init Initial expression of array.
656 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())657 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
658 QualType Type, bool EmitDeclareReductionInit,
659 const Expr *Init,
660 const OMPDeclareReductionDecl *DRD,
661 Address SrcAddr = Address::invalid()) {
662 // Perform element-by-element initialization.
663 QualType ElementTy;
664
665 // Drill down to the base element type on both arrays.
666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668 if (DRD)
669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670
671 llvm::Value *SrcBegin = nullptr;
672 if (DRD)
673 SrcBegin = SrcAddr.emitRawPointer(CGF);
674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value *DestEnd =
677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681 llvm::Value *IsEmpty =
682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687 CGF.EmitBlock(BodyBB);
688
689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690
691 llvm::PHINode *SrcElementPHI = nullptr;
692 Address SrcElementCurrent = Address::invalid();
693 if (DRD) {
694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695 "omp.arraycpy.srcElementPast");
696 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697 SrcElementCurrent =
698 Address(SrcElementPHI, SrcAddr.getElementType(),
699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700 }
701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703 DestElementPHI->addIncoming(DestBegin, EntryBB);
704 Address DestElementCurrent =
705 Address(DestElementPHI, DestAddr.getElementType(),
706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707
708 // Emit copy.
709 {
710 CodeGenFunction::RunCleanupsScope InitScope(CGF);
711 if (EmitDeclareReductionInit) {
712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713 SrcElementCurrent, ElementTy);
714 } else
715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716 /*IsInitializer=*/false);
717 }
718
719 if (DRD) {
720 // Shift the address forward by one element.
721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723 "omp.arraycpy.dest.element");
724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725 }
726
727 // Shift the address forward by one element.
728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730 "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
732 llvm::Value *Done =
733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736
737 // Done.
738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739 }
740
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)741 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742 return CGF.EmitOMPSharedLValue(E);
743 }
744
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)745 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746 const Expr *E) {
747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749 return LValue();
750 }
751
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,const OMPDeclareReductionDecl * DRD)752 void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754 const OMPDeclareReductionDecl *DRD) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
757 // captured region.
758 const auto *PrivateVD =
759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760 bool EmitDeclareReductionInit =
761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763 EmitDeclareReductionInit,
764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765 : PrivateVD->getInit(),
766 DRD, SharedAddr);
767 }
768
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)769 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
770 ArrayRef<const Expr *> Origs,
771 ArrayRef<const Expr *> Privates,
772 ArrayRef<const Expr *> ReductionOps) {
773 ClausesData.reserve(Shareds.size());
774 SharedAddresses.reserve(Shareds.size());
775 Sizes.reserve(Shareds.size());
776 BaseDecls.reserve(Shareds.size());
777 const auto *IOrig = Origs.begin();
778 const auto *IPriv = Privates.begin();
779 const auto *IRed = ReductionOps.begin();
780 for (const Expr *Ref : Shareds) {
781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782 std::advance(IOrig, 1);
783 std::advance(IPriv, 1);
784 std::advance(IRed, 1);
785 }
786 }
787
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)788 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790 "Number of generated lvalues must be exactly N.");
791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793 SharedAddresses.emplace_back(First, Second);
794 if (ClausesData[N].Shared == ClausesData[N].Ref) {
795 OrigAddresses.emplace_back(First, Second);
796 } else {
797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799 OrigAddresses.emplace_back(First, Second);
800 }
801 }
802
emitAggregateType(CodeGenFunction & CGF,unsigned N)803 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
804 QualType PrivateType = getPrivateType(N);
805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806 if (!PrivateType->isVariablyModifiedType()) {
807 Sizes.emplace_back(
808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809 nullptr);
810 return;
811 }
812 llvm::Value *Size;
813 llvm::Value *SizeInChars;
814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816 if (AsArraySection) {
817 Size = CGF.Builder.CreatePtrDiff(ElemType,
818 OrigAddresses[N].second.getPointer(CGF),
819 OrigAddresses[N].first.getPointer(CGF));
820 Size = CGF.Builder.CreateNUWAdd(
821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823 } else {
824 SizeInChars =
825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827 }
828 Sizes.emplace_back(SizeInChars, Size);
829 CodeGenFunction::OpaqueValueMapping OpaqueMap(
830 CGF,
831 cast<OpaqueValueExpr>(
832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833 RValue::get(Size));
834 CGF.EmitVariablyModifiedType(PrivateType);
835 }
836
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)837 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
838 llvm::Value *Size) {
839 QualType PrivateType = getPrivateType(N);
840 if (!PrivateType->isVariablyModifiedType()) {
841 assert(!Size && !Sizes[N].second &&
842 "Size should be nullptr for non-variably modified reduction "
843 "items.");
844 return;
845 }
846 CodeGenFunction::OpaqueValueMapping OpaqueMap(
847 CGF,
848 cast<OpaqueValueExpr>(
849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850 RValue::get(Size));
851 CGF.EmitVariablyModifiedType(PrivateType);
852 }
853
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)854 void ReductionCodeGen::emitInitialization(
855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857 assert(SharedAddresses.size() > N && "No variable was generated");
858 const auto *PrivateVD =
859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860 const OMPDeclareReductionDecl *DRD =
861 getReductionInit(ClausesData[N].ReductionOp);
862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863 if (DRD && DRD->getInitializer())
864 (void)DefaultInit(CGF);
865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867 (void)DefaultInit(CGF);
868 QualType SharedType = SharedAddresses[N].first.getType();
869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870 PrivateAddr, SharedAddr, SharedType);
871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874 PrivateVD->getType().getQualifiers(),
875 /*IsInitializer=*/false);
876 }
877 }
878
needCleanups(unsigned N)879 bool ReductionCodeGen::needCleanups(unsigned N) {
880 QualType PrivateType = getPrivateType(N);
881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882 return DTorKind != QualType::DK_none;
883 }
884
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)885 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
886 Address PrivateAddr) {
887 QualType PrivateType = getPrivateType(N);
888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889 if (needCleanups(N)) {
890 PrivateAddr =
891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893 }
894 }
895
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)896 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897 LValue BaseLV) {
898 BaseTy = BaseTy.getNonReferenceType();
899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903 } else {
904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906 }
907 BaseTy = BaseTy->getPointeeType();
908 }
909 return CGF.MakeAddrLValue(
910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911 BaseLV.getType(), BaseLV.getBaseInfo(),
912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913 }
914
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,Address OriginalBaseAddress,llvm::Value * Addr)915 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916 Address OriginalBaseAddress, llvm::Value *Addr) {
917 RawAddress Tmp = RawAddress::invalid();
918 Address TopTmp = Address::invalid();
919 Address MostTopTmp = Address::invalid();
920 BaseTy = BaseTy.getNonReferenceType();
921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
923 Tmp = CGF.CreateMemTemp(BaseTy);
924 if (TopTmp.isValid())
925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926 else
927 MostTopTmp = Tmp;
928 TopTmp = Tmp;
929 BaseTy = BaseTy->getPointeeType();
930 }
931
932 if (Tmp.isValid()) {
933 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
934 Addr, Tmp.getElementType());
935 CGF.Builder.CreateStore(Addr, Tmp);
936 return MostTopTmp;
937 }
938
939 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
940 Addr, OriginalBaseAddress.getType());
941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942 }
943
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)944 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945 const VarDecl *OrigVD = nullptr;
946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949 Base = TempOASE->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 DE = cast<DeclRefExpr>(Base);
953 OrigVD = cast<VarDecl>(DE->getDecl());
954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 DE = cast<DeclRefExpr>(Base);
959 OrigVD = cast<VarDecl>(DE->getDecl());
960 }
961 return OrigVD;
962 }
963
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)964 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
965 Address PrivateAddr) {
966 const DeclRefExpr *DE;
967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968 BaseDecls.emplace_back(OrigVD);
969 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970 LValue BaseLValue =
971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972 OriginalBaseLValue);
973 Address SharedAddr = SharedAddresses[N].first.getAddress();
974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976 SharedAddr.emitRawPointer(CGF));
977 llvm::Value *PrivatePointer =
978 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980 llvm::Value *Ptr = CGF.Builder.CreateGEP(
981 SharedAddr.getElementType(), PrivatePointer, Adjustment);
982 return castToBase(CGF, OrigVD->getType(),
983 SharedAddresses[N].first.getType(),
984 OriginalBaseLValue.getAddress(), Ptr);
985 }
986 BaseDecls.emplace_back(
987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988 return PrivateAddr;
989 }
990
usesReductionInitializer(unsigned N) const991 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
992 const OMPDeclareReductionDecl *DRD =
993 getReductionInit(ClausesData[N].ReductionOp);
994 return DRD && DRD->getInitializer();
995 }
996
getThreadIDVariableLValue(CodeGenFunction & CGF)997 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998 return CGF.EmitLoadOfPointerLValue(
999 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000 getThreadIDVariable()->getType()->castAs<PointerType>());
1001 }
1002
EmitBody(CodeGenFunction & CGF,const Stmt * S)1003 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004 if (!CGF.HaveInsertPoint())
1005 return;
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF.EHStack.pushTerminate();
1012 if (S)
1013 CGF.incrementProfileCounter(S);
1014 CodeGen(CGF);
1015 CGF.EHStack.popTerminate();
1016 }
1017
getThreadIDVariableLValue(CodeGenFunction & CGF)1018 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction &CGF) {
1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021 getThreadIDVariable()->getType(),
1022 AlignmentSource::Decl);
1023 }
1024
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1025 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1026 QualType FieldTy) {
1027 auto *Field = FieldDecl::Create(
1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031 Field->setAccess(AS_public);
1032 DC->addDecl(Field);
1033 return Field;
1034 }
1035
CGOpenMPRuntime(CodeGenModule & CGM)1036 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1037 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig Config(
1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041 CGM.getLangOpts().OpenMPOffloadMandatory,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1046 ? CGM.getLangOpts().OMPHostIRFile
1047 : StringRef{});
1048 OMPBuilder.setConfig(Config);
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1053 HasRequiresUnifiedSharedMemory = true;
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056 }
1057
clear()1058 void CGOpenMPRuntime::clear() {
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071 }
1072
getName(ArrayRef<StringRef> Parts) const1073 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075 }
1076
1077 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1078 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getLangOpts().Optimize) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1109 CodeGenFunction::OMPPrivateScope Scope(CGF);
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130 }
1131
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1132 void CGOpenMPRuntime::emitUserDefinedReduction(
1133 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1143 Initializer = emitCombinerOrInitializer(
1144 CGM, D->getType(),
1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146 : nullptr,
1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF) {
1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154 Decls.second.push_back(D);
1155 }
1156 }
1157
1158 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1159 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165 }
1166
1167 namespace {
1168 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169 // Builder if one is present.
1170 struct PushAndPopStackRAII {
PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1194 CodeGenFunction::JumpDest Dest =
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 };
1198
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202 OMPBuilder->pushFinalizationCB(std::move(FI));
1203 }
~PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1204 ~PushAndPopStackRAII() {
1205 if (OMPBuilder)
1206 OMPBuilder->popFinalizationCB();
1207 }
1208 llvm::OpenMPIRBuilder *OMPBuilder;
1209 };
1210 } // namespace
1211
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1212 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216 assert(ThreadIDVar->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction CGF(CGM, true);
1219 bool HasCancel = false;
1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225 HasCancel = OPSD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD =
1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD =
1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244 HasCancel, OutlinedHelperName);
1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1246 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1247 }
1248
getOutlinedHelperName(StringRef Name) const1249 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250 std::string Suffix = getName({"omp_outlined"});
1251 return (Name + Suffix).str();
1252 }
1253
getOutlinedHelperName(CodeGenFunction & CGF) const1254 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1255 return getOutlinedHelperName(CGF.CurFn->getName());
1256 }
1257
getReductionFuncName(StringRef Name) const1258 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260 return (Name + Suffix).str();
1261 }
1262
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1263 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1264 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266 const RegionCodeGenTy &CodeGen) {
1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268 return emitParallelOrTeamsOutlinedFunction(
1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270 CodeGen);
1271 }
1272
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278 return emitParallelOrTeamsOutlinedFunction(
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280 CodeGen);
1281 }
1282
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1283 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287 bool Tied, unsigned &NumberOfParts) {
1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289 PrePostActionTy &) {
1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292 llvm::Value *TaskArgs[] = {
1293 UpLoc, ThreadID,
1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295 TaskTVar->getType()->castAs<PointerType>())
1296 .getPointer(CGF)};
1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298 CGM.getModule(), OMPRTL___kmpc_omp_task),
1299 TaskArgs);
1300 };
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302 UntiedCodeGen);
1303 CodeGen.setAction(Action);
1304 assert(!ThreadIDVar->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region =
1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308 : OMPD_task;
1309 const CapturedStmt *CS = D.getCapturedStmt(Region);
1310 bool HasCancel = false;
1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319
1320 CodeGenFunction CGF(CGM, true);
1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322 InnermostKind, HasCancel, Action);
1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325 if (!Tied)
1326 NumberOfParts = Action.getNumberOfParts();
1327 return Res;
1328 }
1329
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1330 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1331 bool AtCurrentPoint) {
1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336 if (AtCurrentPoint) {
1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339 } else {
1340 Elem.second.ServiceInsertPt =
1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343 }
1344 }
1345
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1346 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348 if (Elem.second.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350 Elem.second.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353 }
1354
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1355 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1356 SourceLocation Loc,
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1360 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1361 OS << ";" << PLoc.getFilename() << ";";
1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363 OS << FD->getQualifiedNameAsString();
1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365 return OS.str();
1366 }
1367
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags,bool EmitLoc)1368 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1369 SourceLocation Loc,
1370 unsigned Flags, bool EmitLoc) {
1371 uint32_t SrcLocStrSize;
1372 llvm::Constant *SrcLocStr;
1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo) ||
1375 Loc.isInvalid()) {
1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377 } else {
1378 std::string FunctionName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 const char *FileName = PLoc.getFilename();
1383 unsigned Line = PLoc.getLine();
1384 unsigned Column = PLoc.getColumn();
1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386 Column, SrcLocStrSize);
1387 }
1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389 return OMPBuilder.getOrCreateIdent(
1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391 }
1392
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1393 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1394 SourceLocation Loc) {
1395 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM.getLangOpts().OpenMPIRBuilder) {
1399 SmallString<128> Buffer;
1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401 uint32_t SrcLocStrSize;
1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404 return OMPBuilder.getOrCreateThreadID(
1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406 }
1407
1408 llvm::Value *ThreadID = nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1410 // function.
1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412 if (I != OpenMPLocThreadIDMap.end()) {
1413 ThreadID = I->second.ThreadID;
1414 if (ThreadID != nullptr)
1415 return ThreadID;
1416 }
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo =
1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420 if (OMPRegionInfo->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425 !CGF.getLangOpts().CXXExceptions ||
1426 CGF.Builder.GetInsertBlock() == TopBlock ||
1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 TopBlock ||
1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431 CGF.Builder.GetInsertBlock()) {
1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433 // If value loaded in entry block, cache it and use it everywhere in
1434 // function.
1435 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437 Elem.second.ThreadID = ThreadID;
1438 }
1439 return ThreadID;
1440 }
1441 }
1442 }
1443
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1447 // function.
1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449 if (!Elem.second.ServiceInsertPt)
1450 setLocThreadIdInsertPt(CGF);
1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1453 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461 }
1462
functionFinished(CodeGenFunction & CGF)1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466 clearLocThreadIdInsertPt(CGF);
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483
getIdentTyPointerTy()1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486 }
1487
getKmpc_MicroPointerTy()1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494 }
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497
1498 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
convertDeviceClause(const VarDecl * VD)1499 convertDeviceClause(const VarDecl *VD) {
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519 }
1520
1521 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
convertCaptureClause(const VarDecl * VD)1522 convertCaptureClause(const VarDecl *VD) {
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541 }
1542
getEntryInfoFromPresumedLoc(CodeGenModule & CGM,llvm::OpenMPIRBuilder & OMPBuilder,SourceLocation BeginLoc,llvm::StringRef ParentName="")1543 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1548 SourceManager &SM = CGM.getContext().getSourceManager();
1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550
1551 llvm::sys::fs::UniqueID ID;
1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554 }
1555
1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557 };
1558
1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560 }
1561
getAddrOfDeclareTargetVar(const VarDecl * VD)1562 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1566 return CGM.getLLVMLinkageVarDefinition(VD);
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572 CGM.getContext().getPointerType(VD->getType()));
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 convertCaptureClause(VD), convertDeviceClause(VD),
1575 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576 VD->isExternallyVisible(),
1577 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578 VD->getCanonicalDecl()->getBeginLoc()),
1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581 LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586 }
1587
1588 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1589 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591 !CGM.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName({"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596 }
1597
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1598 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599 const VarDecl *VD,
1600 Address VDAddr,
1601 SourceLocation Loc) {
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1603 CGM.getContext().getTargetInfo().isTLSSupported())
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1611 getOrCreateThreadPrivateCache(VD)};
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 OMPBuilder.getOrCreateRuntimeFunction(
1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616 Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618 }
1619
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1620 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628 OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 OMPBuilder.getOrCreateRuntimeFunction(
1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638 Args);
1639 }
1640
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1641 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1645 CGM.getContext().getTargetInfo().isTLSSupported())
1646 return nullptr;
1647
1648 VD = VD->getDefinition(CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661 ImplicitParamKind::Other);
1662 Args.push_back(&Dst);
1663
1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665 CGM.getContext().VoidPtrTy, Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667 std::string Name = getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1669 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671 Args, Loc, Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674 CGM.getContext().VoidPtrTy, Dst.getLocation());
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1681 CGM.getContext().VoidPtrTy, Dst.getLocation());
1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1686 if (VD->getType().isDestructedType() != QualType::DK_none) {
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693 ImplicitParamKind::Other);
1694 Args.push_back(&Dst);
1695
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(&Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723 /*isVarArg=*/false)
1724 ->getPointerTo();
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729 if (Ctor == nullptr) {
1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731 /*isVarArg=*/false)
1732 ->getPointerTo();
1733 Ctor = llvm::Constant::getNullValue(CtorTy);
1734 }
1735 if (Dtor == nullptr) {
1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737 /*isVarArg=*/false)
1738 ->getPointerTo();
1739 Dtor = llvm::Constant::getNullValue(DtorTy);
1740 }
1741 if (!CGF) {
1742 auto *InitFunctionTy =
1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744 std::string Name = getName({"__omp_threadprivate_init_", ""});
1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction InitCGF(CGM);
1748 FunctionArgList ArgList;
1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751 Loc, Loc);
1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 InitCGF.FinishFunction();
1754 return InitFunction;
1755 }
1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757 }
1758 return nullptr;
1759 }
1760
emitDeclareTargetFunction(const FunctionDecl * FD,llvm::GlobalValue * GV)1761 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1762 llvm::GlobalValue *GV) {
1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768 return;
1769
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1772 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1773 SmallString<128> Name;
1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue *Addr = GV;
1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782 Addr = new llvm::GlobalVariable(
1783 CGM.getModule(), CGM.VoidPtrTy,
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785 nullptr, llvm::GlobalValue::NotThreadLocal,
1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788 }
1789
1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1791 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793 llvm::GlobalValue::WeakODRLinkage);
1794 }
1795
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1796 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1797 QualType VarType,
1798 StringRef Name) {
1799 std::string Suffix = getName({"artificial", ""});
1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802 VarLVType, Twine(Name).concat(Suffix).str());
1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1804 CGM.getTarget().isTLSSupported()) {
1805 GAddr->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr, GAddr->getValueType(),
1807 CGM.getContext().getTypeAlignInChars(VarType));
1808 }
1809 std::string CacheSuffix = getName({"cache", ""});
1810 llvm::Value *Args[] = {
1811 emitUpdateLocation(CGF, SourceLocation()),
1812 getThreadID(CGF, SourceLocation()),
1813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815 /*isSigned=*/false),
1816 OMPBuilder.getOrCreateInternalVariable(
1817 CGM.VoidPtrPtrTy,
1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819 return Address(
1820 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1821 CGF.EmitRuntimeCall(
1822 OMPBuilder.getOrCreateRuntimeFunction(
1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824 Args),
1825 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827 }
1828
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1829 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1830 const RegionCodeGenTy &ThenGen,
1831 const RegionCodeGenTy &ElseGen) {
1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1836 bool CondConstant;
1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838 if (CondConstant)
1839 ThenGen(CGF);
1840 else
1841 ElseGen(CGF);
1842 return;
1843 }
1844
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851
1852 // Emit the 'then' code.
1853 CGF.EmitBlock(ThenBlock);
1854 ThenGen(CGF);
1855 CGF.EmitBranch(ContBlock);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1858 (void)ApplyDebugLocation::CreateEmpty(CGF);
1859 CGF.EmitBlock(ElseBlock);
1860 ElseGen(CGF);
1861 // There is no need to emit line number for unconditional branch.
1862 (void)ApplyDebugLocation::CreateEmpty(CGF);
1863 CGF.EmitBranch(ContBlock);
1864 // Emit the continuation block for code after the if.
1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866 }
1867
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)1868 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1869 llvm::Function *OutlinedFn,
1870 ArrayRef<llvm::Value *> CapturedVars,
1871 const Expr *IfCond,
1872 llvm::Value *NumThreads) {
1873 if (!CGF.HaveInsertPoint())
1874 return;
1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876 auto &M = CGM.getModule();
1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1880 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1881 llvm::Value *Args[] = {
1882 RTLoc,
1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1885 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1886 RealArgs.append(std::begin(Args), std::end(Args));
1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888
1889 llvm::FunctionCallee RTLFn =
1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892 };
1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894 this](CodeGenFunction &CGF, PrePostActionTy &) {
1895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897 // Build calls:
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value *Args[] = {RTLoc, ThreadID};
1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901 M, OMPRTL___kmpc_serialized_parallel),
1902 Args);
1903
1904 // OutlinedFn(>id, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906 RawAddress ZeroAddrBound =
1907 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1908 /*Name=*/".bound.zero.addr");
1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1910 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929 M, OMPRTL___kmpc_end_serialized_parallel),
1930 EndArgs);
1931 };
1932 if (IfCond) {
1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934 } else {
1935 RegionCodeGenTy ThenRCG(ThenGen);
1936 ThenRCG(CGF);
1937 }
1938 }
1939
1940 // If we're inside an (outlined) parallel region, use the region info's
1941 // thread-ID variable (it is passed in a first argument of the outlined function
1942 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943 // regular serial code region, get thread ID by calling kmp_int32
1944 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1946 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1947 SourceLocation Loc) {
1948 if (auto *OMPRegionInfo =
1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950 if (OMPRegionInfo->getThreadIDVariable())
1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954 QualType Int32Ty =
1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957 CGF.EmitStoreOfScalar(ThreadID,
1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959
1960 return ThreadIDTemp;
1961 }
1962
getCriticalRegionLock(StringRef CriticalName)1963 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965 std::string Name = getName({Prefix, "var"});
1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967 }
1968
1969 namespace {
1970 /// Common pre(post)-action for different OpenMP constructs.
1971 class CommonActionTy final : public PrePostActionTy {
1972 llvm::FunctionCallee EnterCallee;
1973 ArrayRef<llvm::Value *> EnterArgs;
1974 llvm::FunctionCallee ExitCallee;
1975 ArrayRef<llvm::Value *> ExitArgs;
1976 bool Conditional;
1977 llvm::BasicBlock *ContBlock = nullptr;
1978
1979 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)1980 CommonActionTy(llvm::FunctionCallee EnterCallee,
1981 ArrayRef<llvm::Value *> EnterArgs,
1982 llvm::FunctionCallee ExitCallee,
1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)1986 void Enter(CodeGenFunction &CGF) override {
1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988 if (Conditional) {
1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991 ContBlock = CGF.createBasicBlock("omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994 CGF.EmitBlock(ThenBlock);
1995 }
1996 }
Done(CodeGenFunction & CGF)1997 void Done(CodeGenFunction &CGF) {
1998 // Emit the rest of blocks/branches
1999 CGF.EmitBranch(ContBlock);
2000 CGF.EmitBlock(ContBlock, true);
2001 }
Exit(CodeGenFunction & CGF)2002 void Exit(CodeGenFunction &CGF) override {
2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004 }
2005 };
2006 } // anonymous namespace
2007
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2008 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2009 StringRef CriticalName,
2010 const RegionCodeGenTy &CriticalOpGen,
2011 SourceLocation Loc, const Expr *Hint) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013 // CriticalOpGen();
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF.HaveInsertPoint())
2017 return;
2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019 getCriticalRegionLock(CriticalName)};
2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021 std::end(Args));
2022 if (Hint) {
2023 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025 }
2026 CommonActionTy Action(
2027 OMPBuilder.getOrCreateRuntimeFunction(
2028 CGM.getModule(),
2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030 EnterArgs,
2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032 OMPRTL___kmpc_end_critical),
2033 Args);
2034 CriticalOpGen.setAction(Action);
2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036 }
2037
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2038 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2039 const RegionCodeGenTy &MasterOpGen,
2040 SourceLocation Loc) {
2041 if (!CGF.HaveInsertPoint())
2042 return;
2043 // if(__kmpc_master(ident_t *, gtid)) {
2044 // MasterOpGen();
2045 // __kmpc_end_master(ident_t *, gtid);
2046 // }
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050 CGM.getModule(), OMPRTL___kmpc_master),
2051 Args,
2052 OMPBuilder.getOrCreateRuntimeFunction(
2053 CGM.getModule(), OMPRTL___kmpc_end_master),
2054 Args,
2055 /*Conditional=*/true);
2056 MasterOpGen.setAction(Action);
2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058 Action.Done(CGF);
2059 }
2060
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2061 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2062 const RegionCodeGenTy &MaskedOpGen,
2063 SourceLocation Loc, const Expr *Filter) {
2064 if (!CGF.HaveInsertPoint())
2065 return;
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067 // MaskedOpGen();
2068 // __kmpc_end_masked(iden_t *, gtid);
2069 // }
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value *FilterVal = Filter
2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075 FilterVal};
2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077 getThreadID(CGF, Loc)};
2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079 CGM.getModule(), OMPRTL___kmpc_masked),
2080 Args,
2081 OMPBuilder.getOrCreateRuntimeFunction(
2082 CGM.getModule(), OMPRTL___kmpc_end_masked),
2083 ArgsEnd,
2084 /*Conditional=*/true);
2085 MaskedOpGen.setAction(Action);
2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087 Action.Done(CGF);
2088 }
2089
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2090 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095 OMPBuilder.createTaskyield(CGF.Builder);
2096 } else {
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value *Args[] = {
2099 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103 Args);
2104 }
2105
2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107 Region->emitUntiedSwitch(CGF);
2108 }
2109
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2110 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2111 const RegionCodeGenTy &TaskgroupOpGen,
2112 SourceLocation Loc) {
2113 if (!CGF.HaveInsertPoint())
2114 return;
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122 Args,
2123 OMPBuilder.getOrCreateRuntimeFunction(
2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125 Args);
2126 TaskgroupOpGen.setAction(Action);
2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128 }
2129
2130 /// Given an array of pointers to variables, project the address of a
2131 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2132 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2133 unsigned Index, const VarDecl *Var) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137
2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139 return Address(
2140 CGF.Builder.CreateBitCast(
2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142 ElemTy, CGF.getContext().getDeclAlign(Var));
2143 }
2144
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsElemType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2145 static llvm::Value *emitCopyprivateCopyFunction(
2146 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2149 SourceLocation Loc) {
2150 ASTContext &C = CGM.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args;
2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154 ImplicitParamKind::Other);
2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2156 ImplicitParamKind::Other);
2157 Args.push_back(&LHSArg);
2158 Args.push_back(&RHSArg);
2159 const auto &CGFI =
2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161 std::string Name =
2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164 llvm::GlobalValue::InternalLinkage, Name,
2165 &CGM.getModule());
2166 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2167 Fn->setDoesNotRecurse();
2168 CodeGenFunction CGF(CGM);
2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2172 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174 ArgsElemType->getPointerTo()),
2175 ArgsElemType, CGF.getPointerAlign());
2176 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178 ArgsElemType->getPointerTo()),
2179 ArgsElemType, CGF.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182 // ...
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185 const auto *DestVar =
2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188
2189 const auto *SrcVar =
2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192
2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194 QualType Type = VD->getType();
2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196 }
2197 CGF.FinishFunction();
2198 return Fn;
2199 }
2200
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2201 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2202 const RegionCodeGenTy &SingleOpGen,
2203 SourceLocation Loc,
2204 ArrayRef<const Expr *> CopyprivateVars,
2205 ArrayRef<const Expr *> SrcExprs,
2206 ArrayRef<const Expr *> DstExprs,
2207 ArrayRef<const Expr *> AssignmentOps) {
2208 if (!CGF.HaveInsertPoint())
2209 return;
2210 assert(CopyprivateVars.size() == SrcExprs.size() &&
2211 CopyprivateVars.size() == DstExprs.size() &&
2212 CopyprivateVars.size() == AssignmentOps.size());
2213 ASTContext &C = CGM.getContext();
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2216 // SingleOpGen();
2217 // __kmpc_end_single(ident_t *, gtid);
2218 // did_it = 1;
2219 // }
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2222
2223 Address DidIt = Address::invalid();
2224 if (!CopyprivateVars.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty =
2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230 }
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_single),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 CGM.getModule(), OMPRTL___kmpc_end_single),
2238 Args,
2239 /*Conditional=*/true);
2240 SingleOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242 if (DidIt.isValid()) {
2243 // did_it = 1;
2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245 }
2246 Action.Done(CGF);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt.isValid()) {
2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251 QualType CopyprivateArrayTy = C.getConstantArrayType(
2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList =
2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259 CGF.Builder.CreateStore(
2260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262 CGF.VoidPtrTy),
2263 Elem);
2264 }
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269 SrcExprs, DstExprs, AssignmentOps, Loc);
2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2271 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274 llvm::Value *Args[] = {
2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276 getThreadID(CGF, Loc), // i32 <gtid>
2277 BufSize, // size_t <buf_size>
2278 CL.emitRawPointer(CGF), // void *<copyprivate list>
2279 CpyFn, // void (*) (void *, void *) <copy_func>
2280 DidItVal // i32 did_it
2281 };
2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284 Args);
2285 }
2286 }
2287
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2288 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2289 const RegionCodeGenTy &OrderedOpGen,
2290 SourceLocation Loc, bool IsThreads) {
2291 if (!CGF.HaveInsertPoint())
2292 return;
2293 // __kmpc_ordered(ident_t *, gtid);
2294 // OrderedOpGen();
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2297 if (IsThreads) {
2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300 CGM.getModule(), OMPRTL___kmpc_ordered),
2301 Args,
2302 OMPBuilder.getOrCreateRuntimeFunction(
2303 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304 Args);
2305 OrderedOpGen.setAction(Action);
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307 return;
2308 }
2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310 }
2311
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2313 unsigned Flags;
2314 if (Kind == OMPD_for)
2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316 else if (Kind == OMPD_sections)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318 else if (Kind == OMPD_single)
2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320 else if (Kind == OMPD_barrier)
2321 Flags = OMP_IDENT_BARRIER_EXPL;
2322 else
2323 Flags = OMP_IDENT_BARRIER_IMPL;
2324 return Flags;
2325 }
2326
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2327 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2328 CodeGenFunction &CGF, const OMPLoopDirective &S,
2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2332 if (llvm::any_of(
2333 S.getClausesOfKind<OMPOrderedClause>(),
2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335 ScheduleKind = OMPC_SCHEDULE_static;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt ChunkSize(32, 1);
2338 ChunkExpr = IntegerLiteral::Create(
2339 CGF.getContext(), ChunkSize,
2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341 SourceLocation());
2342 }
2343 }
2344
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2346 OpenMPDirectiveKind Kind, bool EmitChecks,
2347 bool ForceSimpleCall) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo =
2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354 return;
2355 }
2356
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363 // thread_id);
2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365 getThreadID(CGF, Loc)};
2366 if (OMPRegionInfo) {
2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368 llvm::Value *Result = CGF.EmitRuntimeCall(
2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370 OMPRTL___kmpc_cancel_barrier),
2371 Args);
2372 if (EmitChecks) {
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2375 // }
2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380 CGF.EmitBlock(ExitBB);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination =
2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384 CGF.EmitBranchThroughCleanup(CancelDestination);
2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386 }
2387 return;
2388 }
2389 }
2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391 CGM.getModule(), OMPRTL___kmpc_barrier),
2392 Args);
2393 }
2394
emitErrorCall(CodeGenFunction & CGF,SourceLocation Loc,Expr * ME,bool IsFatal)2395 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2396 Expr *ME, bool IsFatal) {
2397 llvm::Value *MVL =
2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401 // *message)
2402 llvm::Value *Args[] = {
2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407 CGM.getModule(), OMPRTL___kmpc_error),
2408 Args);
2409 }
2410
2411 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2412 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413 bool Chunked, bool Ordered) {
2414 switch (ScheduleKind) {
2415 case OMPC_SCHEDULE_static:
2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417 : (Ordered ? OMP_ord_static : OMP_sch_static);
2418 case OMPC_SCHEDULE_dynamic:
2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420 case OMPC_SCHEDULE_guided:
2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422 case OMPC_SCHEDULE_runtime:
2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424 case OMPC_SCHEDULE_auto:
2425 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2426 case OMPC_SCHEDULE_unknown:
2427 assert(!Chunked && "chunk was specified but schedule kind not known");
2428 return Ordered ? OMP_ord_static : OMP_sch_static;
2429 }
2430 llvm_unreachable("Unexpected runtime schedule");
2431 }
2432
2433 /// Map the OpenMP distribute schedule to the runtime enumeration.
2434 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2435 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2436 // only static is allowed for dist_schedule
2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438 }
2439
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2440 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2441 bool Chunked) const {
2442 OpenMPSchedType Schedule =
2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444 return Schedule == OMP_sch_static;
2445 }
2446
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2447 bool CGOpenMPRuntime::isStaticNonchunked(
2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450 return Schedule == OMP_dist_sch_static;
2451 }
2452
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2453 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2454 bool Chunked) const {
2455 OpenMPSchedType Schedule =
2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457 return Schedule == OMP_sch_static_chunked;
2458 }
2459
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2460 bool CGOpenMPRuntime::isStaticChunked(
2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463 return Schedule == OMP_dist_sch_static_chunked;
2464 }
2465
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2466 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2467 OpenMPSchedType Schedule =
2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470 return Schedule != OMP_sch_static;
2471 }
2472
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2473 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474 OpenMPScheduleClauseModifier M1,
2475 OpenMPScheduleClauseModifier M2) {
2476 int Modifier = 0;
2477 switch (M1) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic:
2479 Modifier = OMP_sch_modifier_monotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482 Modifier = OMP_sch_modifier_nonmonotonic;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_simd:
2485 if (Schedule == OMP_sch_static_chunked)
2486 Schedule = OMP_sch_static_balanced_chunked;
2487 break;
2488 case OMPC_SCHEDULE_MODIFIER_last:
2489 case OMPC_SCHEDULE_MODIFIER_unknown:
2490 break;
2491 }
2492 switch (M2) {
2493 case OMPC_SCHEDULE_MODIFIER_monotonic:
2494 Modifier = OMP_sch_modifier_monotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 break;
2499 case OMPC_SCHEDULE_MODIFIER_simd:
2500 if (Schedule == OMP_sch_static_chunked)
2501 Schedule = OMP_sch_static_balanced_chunked;
2502 break;
2503 case OMPC_SCHEDULE_MODIFIER_last:
2504 case OMPC_SCHEDULE_MODIFIER_unknown:
2505 break;
2506 }
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2512 // specified.
2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515 Schedule == OMP_sch_static_balanced_chunked ||
2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517 Schedule == OMP_dist_sch_static_chunked ||
2518 Schedule == OMP_dist_sch_static))
2519 Modifier = OMP_sch_modifier_nonmonotonic;
2520 }
2521 return Schedule | Modifier;
2522 }
2523
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2524 void CGOpenMPRuntime::emitForDispatchInit(
2525 CodeGenFunction &CGF, SourceLocation Loc,
2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527 bool Ordered, const DispatchRTInput &DispatchValues) {
2528 if (!CGF.HaveInsertPoint())
2529 return;
2530 OpenMPSchedType Schedule = getRuntimeSchedule(
2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532 assert(Ordered ||
2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535 Schedule != OMP_sch_static_balanced_chunked));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543 : CGF.Builder.getIntN(IVSize, 1);
2544 llvm::Value *Args[] = {
2545 emitUpdateLocation(CGF, Loc),
2546 getThreadID(CGF, Loc),
2547 CGF.Builder.getInt32(addMonoNonMonoModifier(
2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549 DispatchValues.LB, // Lower
2550 DispatchValues.UB, // Upper
2551 CGF.Builder.getIntN(IVSize, 1), // Stride
2552 Chunk // Chunk
2553 };
2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555 Args);
2556 }
2557
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)2558 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2559 SourceLocation Loc) {
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565 }
2566
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2567 static void emitForStaticInitCall(
2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2570 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2571 const CGOpenMPRuntime::StaticRTInput &Values) {
2572 if (!CGF.HaveInsertPoint())
2573 return;
2574
2575 assert(!Values.Ordered);
2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static ||
2580 Schedule == OMP_dist_sch_static_chunked);
2581
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value *Chunk = Values.Chunk;
2588 if (Chunk == nullptr) {
2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590 Schedule == OMP_dist_sch_static) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594 } else {
2595 assert((Schedule == OMP_sch_static_chunked ||
2596 Schedule == OMP_sch_static_balanced_chunked ||
2597 Schedule == OMP_ord_static_chunked ||
2598 Schedule == OMP_dist_sch_static_chunked) &&
2599 "expected static chunked schedule");
2600 }
2601 llvm::Value *Args[] = {
2602 UpdateLocation,
2603 ThreadId,
2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605 M2)), // Schedule type
2606 Values.IL.emitRawPointer(CGF), // &isLastIter
2607 Values.LB.emitRawPointer(CGF), // &LB
2608 Values.UB.emitRawPointer(CGF), // &UB
2609 Values.ST.emitRawPointer(CGF), // &Stride
2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2611 Chunk // Chunk
2612 };
2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614 }
2615
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2616 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2617 SourceLocation Loc,
2618 OpenMPDirectiveKind DKind,
2619 const OpenMPScheduleTy &ScheduleKind,
2620 const StaticRTInput &Values) {
2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2626 isOpenMPLoopDirective(DKind)
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction =
2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632 false);
2633 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636 }
2637
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2638 void CGOpenMPRuntime::emitDistributeStaticInit(
2639 CodeGenFunction &CGF, SourceLocation Loc,
2640 OpenMPDistScheduleClauseKind SchedKind,
2641 const CGOpenMPRuntime::StaticRTInput &Values) {
2642 OpenMPSchedType ScheduleNum =
2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644 llvm::Value *UpdatedLocation =
2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647 llvm::FunctionCallee StaticInitFunction;
2648 bool isGPUDistribute =
2649 CGM.getLangOpts().OpenMPIsTargetDevice &&
2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652 Values.IVSize, Values.IVSigned, isGPUDistribute);
2653
2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2656 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2657 }
2658
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2659 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2660 SourceLocation Loc,
2661 OpenMPDirectiveKind DKind) {
2662 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663 DKind == OMPD_sections) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value *Args[] = {
2669 emitUpdateLocation(CGF, Loc,
2670 isOpenMPDistributeDirective(DKind) ||
2671 (DKind == OMPD_target_teams_loop)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS),
2676 getThreadID(CGF, Loc)};
2677 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2678 if (isOpenMPDistributeDirective(DKind) &&
2679 CGM.getLangOpts().OpenMPIsTargetDevice &&
2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681 CGF.EmitRuntimeCall(
2682 OMPBuilder.getOrCreateRuntimeFunction(
2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684 Args);
2685 else
2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688 Args);
2689 }
2690
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2691 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2692 SourceLocation Loc,
2693 unsigned IVSize,
2694 bool IVSigned) {
2695 if (!CGF.HaveInsertPoint())
2696 return;
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700 Args);
2701 }
2702
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2703 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2704 SourceLocation Loc, unsigned IVSize,
2705 bool IVSigned, Address IL,
2706 Address LB, Address UB,
2707 Address ST) {
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value *Args[] = {
2713 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2714 IL.emitRawPointer(CGF), // &isLastIter
2715 LB.emitRawPointer(CGF), // &Lower
2716 UB.emitRawPointer(CGF), // &Upper
2717 ST.emitRawPointer(CGF) // &Stride
2718 };
2719 llvm::Value *Call = CGF.EmitRuntimeCall(
2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721 return CGF.EmitScalarConversion(
2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723 CGF.getContext().BoolTy, Loc);
2724 }
2725
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2726 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2727 llvm::Value *NumThreads,
2728 SourceLocation Loc) {
2729 if (!CGF.HaveInsertPoint())
2730 return;
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value *Args[] = {
2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737 Args);
2738 }
2739
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2740 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2741 ProcBindKind ProcBind,
2742 SourceLocation Loc) {
2743 if (!CGF.HaveInsertPoint())
2744 return;
2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value *Args[] = {
2748 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752 Args);
2753 }
2754
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2755 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2756 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758 OMPBuilder.createFlush(CGF.Builder);
2759 } else {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764 CGM.getModule(), OMPRTL___kmpc_flush),
2765 emitUpdateLocation(CGF, Loc));
2766 }
2767 }
2768
2769 namespace {
2770 /// Indexes of fields for type kmp_task_t.
2771 enum KmpTaskTFields {
2772 /// List of shared variables.
2773 KmpTaskTShareds,
2774 /// Task routine.
2775 KmpTaskTRoutine,
2776 /// Partition id for the untied tasks.
2777 KmpTaskTPartId,
2778 /// Function with call of destructors for private variables.
2779 Data1,
2780 /// Task priority.
2781 Data2,
2782 /// (Taskloops only) Lower bound.
2783 KmpTaskTLowerBound,
2784 /// (Taskloops only) Upper bound.
2785 KmpTaskTUpperBound,
2786 /// (Taskloops only) Stride.
2787 KmpTaskTStride,
2788 /// (Taskloops only) Is last iteration flag.
2789 KmpTaskTLastIter,
2790 /// (Taskloops only) Reduction data.
2791 KmpTaskTReductions,
2792 };
2793 } // anonymous namespace
2794
createOffloadEntriesAndInfoMetadata()2795 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2796 // If we are in simd mode or there are no entries, we don't need to do
2797 // anything.
2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799 return;
2800
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2804 SourceLocation Loc;
2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2807 E = CGM.getContext().getSourceManager().fileinfo_end();
2808 I != E; ++I) {
2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2811 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2812 I->getFirst(), EntryInfo.Line, 1);
2813 break;
2814 }
2815 }
2816 }
2817 switch (Kind) {
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820 DiagnosticsEngine::Error, "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824 } break;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error, "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834 DiagnosticsEngine::Error,
2835 "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(DiagID);
2838 } break;
2839 }
2840 };
2841
2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843 }
2844
emitKmpRoutineEntryT(QualType KmpInt32Ty)2845 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2846 if (!KmpRoutineEntryPtrTy) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2848 ASTContext &C = CGM.getContext();
2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2850 FunctionProtoType::ExtProtoInfo EPI;
2851 KmpRoutineEntryPtrQTy = C.getPointerType(
2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2853 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2854 }
2855 }
2856
2857 namespace {
2858 struct PrivateHelpersTy {
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862 PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864 const Expr *OriginalRef = nullptr;
2865 const VarDecl *Original = nullptr;
2866 const VarDecl *PrivateCopy = nullptr;
2867 const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anon93cce0fb0e11::PrivateHelpersTy2868 bool isLocalPrivate() const {
2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870 }
2871 };
2872 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873 } // anonymous namespace
2874
isAllocatableDecl(const VarDecl * VD)2875 static bool isAllocatableDecl(const VarDecl *VD) {
2876 const VarDecl *CVD = VD->getCanonicalDecl();
2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878 return false;
2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880 // Use the default allocation.
2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882 !AA->getAllocator());
2883 }
2884
2885 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)2886 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2887 if (!Privates.empty()) {
2888 ASTContext &C = CGM.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2891 // };
2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893 RD->startDefinition();
2894 for (const auto &Pair : Privates) {
2895 const VarDecl *VD = Pair.second.Original;
2896 QualType Type = VD->getType().getNonReferenceType();
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair.second.isLocalPrivate()) {
2900 if (VD->getType()->isLValueReferenceType())
2901 Type = C.getPointerType(Type);
2902 if (isAllocatableDecl(VD))
2903 Type = C.getPointerType(Type);
2904 }
2905 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2906 if (VD->hasAttrs()) {
2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908 E(VD->getAttrs().end());
2909 I != E; ++I)
2910 FD->addAttr(*I);
2911 }
2912 }
2913 RD->completeDefinition();
2914 return RD;
2915 }
2916 return nullptr;
2917 }
2918
2919 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)2920 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2921 QualType KmpInt32Ty,
2922 QualType KmpRoutineEntryPointerQTy) {
2923 ASTContext &C = CGM.getContext();
2924 // Build struct kmp_task_t {
2925 // void * shareds;
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2931 // kmp_uint64 lb;
2932 // kmp_uint64 ub;
2933 // kmp_int64 st;
2934 // kmp_int32 liter;
2935 // void * reductions;
2936 // };
2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938 UD->startDefinition();
2939 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941 UD->completeDefinition();
2942 QualType KmpCmplrdataTy = C.getRecordType(UD);
2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944 RD->startDefinition();
2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950 if (isOpenMPTaskLoopDirective(Kind)) {
2951 QualType KmpUInt64Ty =
2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty =
2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960 }
2961 RD->completeDefinition();
2962 return RD;
2963 }
2964
2965 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)2966 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2967 ArrayRef<PrivateDataTy> Privates) {
2968 ASTContext &C = CGM.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2972 // };
2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974 RD->startDefinition();
2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978 RD->completeDefinition();
2979 return RD;
2980 }
2981
2982 /// Emit a proxy function which accepts kmp_task_t as the second
2983 /// argument.
2984 /// \code
2985 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987 /// For taskloops:
2988 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989 /// tt->reductions, tt->shareds);
2990 /// return 0;
2991 /// }
2992 /// \endcode
2993 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)2994 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996 QualType KmpTaskTWithPrivatesPtrQTy,
2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999 llvm::Value *TaskPrivatesMap) {
3000 ASTContext &C = CGM.getContext();
3001 FunctionArgList Args;
3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3003 ImplicitParamKind::Other);
3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3006 ImplicitParamKind::Other);
3007 Args.push_back(&GtidArg);
3008 Args.push_back(&TaskTypeArg);
3009 const auto &TaskEntryFnInfo =
3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011 llvm::FunctionType *TaskEntryTy =
3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014 auto *TaskEntry = llvm::Function::Create(
3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017 TaskEntry->setDoesNotRecurse();
3018 CodeGenFunction CGF(CGM);
3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020 Loc, Loc);
3021
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023 // tt,
3024 // For taskloops:
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032 const auto *KmpTaskTWithPrivatesQTyRD =
3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034 LValue Base =
3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045 CGF.ConvertTypeForMem(SharedsPtrTy));
3046
3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048 llvm::Value *PrivatesParam;
3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053 } else {
3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055 }
3056
3057 llvm::Value *CommonArgs[] = {
3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059 CGF.Builder
3060 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3061 CGF.VoidPtrTy, CGF.Int8Ty)
3062 .emitRawPointer(CGF)};
3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064 std::end(CommonArgs));
3065 if (isOpenMPTaskLoopDirective(Kind)) {
3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081 CallArgs.push_back(LBParam);
3082 CallArgs.push_back(UBParam);
3083 CallArgs.push_back(StParam);
3084 CallArgs.push_back(LIParam);
3085 CallArgs.push_back(RParam);
3086 }
3087 CallArgs.push_back(SharedsParam);
3088
3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090 CallArgs);
3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093 CGF.FinishFunction();
3094 return TaskEntry;
3095 }
3096
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3097 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3098 SourceLocation Loc,
3099 QualType KmpInt32Ty,
3100 QualType KmpTaskTWithPrivatesPtrQTy,
3101 QualType KmpTaskTWithPrivatesQTy) {
3102 ASTContext &C = CGM.getContext();
3103 FunctionArgList Args;
3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3105 ImplicitParamKind::Other);
3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3108 ImplicitParamKind::Other);
3109 Args.push_back(&GtidArg);
3110 Args.push_back(&TaskTypeArg);
3111 const auto &DestructorFnInfo =
3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113 llvm::FunctionType *DestructorFnTy =
3114 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115 std::string Name =
3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117 auto *DestructorFn =
3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119 Name, &CGM.getModule());
3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121 DestructorFnInfo);
3122 DestructorFn->setDoesNotRecurse();
3123 CodeGenFunction CGF(CGM);
3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125 Args, Loc, Loc);
3126
3127 LValue Base = CGF.EmitLoadOfPointerLValue(
3128 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130 const auto *KmpTaskTWithPrivatesQTyRD =
3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133 Base = CGF.EmitLValueForField(Base, *FI);
3134 for (const auto *Field :
3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind =
3137 Field->getType().isDestructedType()) {
3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140 }
3141 }
3142 CGF.FinishFunction();
3143 return DestructorFn;
3144 }
3145
3146 /// Emit a privates mapping function for correct handling of private and
3147 /// firstprivate variables.
3148 /// \code
3149 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150 /// **noalias priv1,..., <tyn> **noalias privn) {
3151 /// *priv1 = &.privates.priv1;
3152 /// ...;
3153 /// *privn = &.privates.privn;
3154 /// }
3155 /// \endcode
3156 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3157 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3158 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159 ArrayRef<PrivateDataTy> Privates) {
3160 ASTContext &C = CGM.getContext();
3161 FunctionArgList Args;
3162 ImplicitParamDecl TaskPrivatesArg(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3165 ImplicitParamKind::Other);
3166 Args.push_back(&TaskPrivatesArg);
3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168 unsigned Counter = 1;
3169 for (const Expr *E : Data.PrivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3175 ImplicitParamKind::Other));
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.FirstprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3186 ImplicitParamKind::Other));
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const Expr *E : Data.LastprivateVars) {
3192 Args.push_back(ImplicitParamDecl::Create(
3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194 C.getPointerType(C.getPointerType(E->getType()))
3195 .withConst()
3196 .withRestrict(),
3197 ImplicitParamKind::Other));
3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199 PrivateVarsPos[VD] = Counter;
3200 ++Counter;
3201 }
3202 for (const VarDecl *VD : Data.PrivateLocals) {
3203 QualType Ty = VD->getType().getNonReferenceType();
3204 if (VD->getType()->isLValueReferenceType())
3205 Ty = C.getPointerType(Ty);
3206 if (isAllocatableDecl(VD))
3207 Ty = C.getPointerType(Ty);
3208 Args.push_back(ImplicitParamDecl::Create(
3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3211 ImplicitParamKind::Other));
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 const auto &TaskPrivatesMapFnInfo =
3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217 llvm::FunctionType *TaskPrivatesMapTy =
3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219 std::string Name =
3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap = llvm::Function::Create(
3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223 &CGM.getModule());
3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225 TaskPrivatesMapFnInfo);
3226 if (CGM.getLangOpts().Optimize) {
3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230 }
3231 CodeGenFunction CGF(CGM);
3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234
3235 // *privi = &.privates.privi;
3236 LValue Base = CGF.EmitLoadOfPointerLValue(
3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238 TaskPrivatesArg.getType()->castAs<PointerType>());
3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240 Counter = 0;
3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244 LValue RefLVal =
3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249 ++Counter;
3250 }
3251 CGF.FinishFunction();
3252 return TaskPrivatesMap;
3253 }
3254
3255 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3256 static void emitPrivatesInit(CodeGenFunction &CGF,
3257 const OMPExecutableDirective &D,
3258 Address KmpTaskSharedsPtr, LValue TDBase,
3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260 QualType SharedsTy, QualType SharedsPtrTy,
3261 const OMPTaskDataTy &Data,
3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263 ASTContext &C = CGF.getContext();
3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267 ? OMPD_taskloop
3268 : OMPD_task;
3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271 LValue SrcBase;
3272 bool IsTargetTask =
3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280 SrcBase = CGF.MakeAddrLValue(
3281 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283 CGF.ConvertTypeForMem(SharedsTy)),
3284 SharedsTy);
3285 }
3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy &Pair : Privates) {
3288 // Do not initialize private locals.
3289 if (Pair.second.isLocalPrivate()) {
3290 ++FI;
3291 continue;
3292 }
3293 const VarDecl *VD = Pair.second.PrivateCopy;
3294 const Expr *Init = VD->getAnyInitializer();
3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296 !CGF.isTrivialInitializer(Init)))) {
3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299 const VarDecl *OriginalVD = Pair.second.Original;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue;
3303 QualType Type = PrivateLValue.getType();
3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305 if (IsTargetTask && !SharedField) {
3306 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308 cast<CapturedDecl>(OriginalVD->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa<TranslationUnitDecl>(
3311 cast<CapturedDecl>(OriginalVD->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3314 SharedRefLValue =
3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316 } else if (ForDup) {
3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318 SharedRefLValue = CGF.MakeAddrLValue(
3319 SharedRefLValue.getAddress().withAlignment(
3320 C.getDeclAlign(OriginalVD)),
3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322 SharedRefLValue.getTBAAInfo());
3323 } else if (CGF.LambdaCaptureFields.count(
3324 Pair.second.Original->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327 } else {
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII Region(
3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333 }
3334 if (Type->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337 // Perform simple memcpy.
3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339 } else {
3340 // Initialize firstprivate array using element-by-element
3341 // initialization.
3342 CGF.EmitOMPAggregateAssign(
3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345 Address SrcElement) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348 InitScope.addPrivate(Elem, SrcElement);
3349 (void)InitScope.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352 CGF, &CapturesInfo);
3353 CGF.EmitAnyExprToMem(Init, DestElement,
3354 Init->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3356 });
3357 }
3358 } else {
3359 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361 (void)InitScope.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364 /*capturedByInit=*/false);
3365 }
3366 } else {
3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368 }
3369 }
3370 ++FI;
3371 }
3372 }
3373
3374 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3375 static bool checkInitIsRequired(CodeGenFunction &CGF,
3376 ArrayRef<PrivateDataTy> Privates) {
3377 bool InitRequired = false;
3378 for (const PrivateDataTy &Pair : Privates) {
3379 if (Pair.second.isLocalPrivate())
3380 continue;
3381 const VarDecl *VD = Pair.second.PrivateCopy;
3382 const Expr *Init = VD->getAnyInitializer();
3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3384 !CGF.isTrivialInitializer(Init));
3385 if (InitRequired)
3386 break;
3387 }
3388 return InitRequired;
3389 }
3390
3391
3392 /// Emit task_dup function (for initialization of
3393 /// private/firstprivate/lastprivate vars and last_iter flag)
3394 /// \code
3395 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396 /// lastpriv) {
3397 /// // setup lastprivate flag
3398 /// task_dst->last = lastpriv;
3399 /// // could be constructor calls here...
3400 /// }
3401 /// \endcode
3402 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3403 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3404 const OMPExecutableDirective &D,
3405 QualType KmpTaskTWithPrivatesPtrQTy,
3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410 ASTContext &C = CGM.getContext();
3411 FunctionArgList Args;
3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy,
3414 ImplicitParamKind::Other);
3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy,
3417 ImplicitParamKind::Other);
3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3419 ImplicitParamKind::Other);
3420 Args.push_back(&DstArg);
3421 Args.push_back(&SrcArg);
3422 Args.push_back(&LastprivArg);
3423 const auto &TaskDupFnInfo =
3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427 auto *TaskDup = llvm::Function::Create(
3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430 TaskDup->setDoesNotRecurse();
3431 CodeGenFunction CGF(CGM);
3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433 Loc);
3434
3435 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436 CGF.GetAddrOfLocalVar(&DstArg),
3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 // task_dst->liter = lastpriv;
3439 if (WithLastIter) {
3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3441 LValue Base = CGF.EmitLValueForField(
3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447 }
3448
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates.empty());
3451 Address KmpTaskSharedsPtr = Address::invalid();
3452 if (!Data.FirstprivateVars.empty()) {
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 CGF.GetAddrOfLocalVar(&SrcArg),
3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3456 LValue Base = CGF.EmitLValueForField(
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 KmpTaskSharedsPtr = Address(
3459 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3460 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461 KmpTaskTShareds)),
3462 Loc),
3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464 }
3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467 CGF.FinishFunction();
3468 return TaskDup;
3469 }
3470
3471 /// Checks if destructor function is required to be generated.
3472 /// \return true if cleanups are required, false otherwise.
3473 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)3474 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475 ArrayRef<PrivateDataTy> Privates) {
3476 for (const PrivateDataTy &P : Privates) {
3477 if (P.second.isLocalPrivate())
3478 continue;
3479 QualType Ty = P.second.Original->getType().getNonReferenceType();
3480 if (Ty.isDestructedType())
3481 return true;
3482 }
3483 return false;
3484 }
3485
3486 namespace {
3487 /// Loop generator for OpenMP iterator expression.
3488 class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope {
3490 CodeGenFunction &CGF;
3491 const OMPIteratorExpr *E = nullptr;
3492 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3493 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500 if (!E)
3501 return;
3502 SmallVector<llvm::Value *, 4> Uppers;
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508 addPrivate(
3509 HelperData.CounterVD,
3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511 }
3512 Privatize();
3513
3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 LValue CLVal =
3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518 HelperData.CounterVD->getType());
3519 // Counter = 0;
3520 CGF.EmitStoreOfScalar(
3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522 CLVal);
3523 CodeGenFunction::JumpDest &ContDest =
3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525 CodeGenFunction::JumpDest &ExitDest =
3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value *N = Uppers[I];
3529 // cont:
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF.EmitBlock(ContDest.getBlock());
3532 auto *CVal =
3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534 llvm::Value *Cmp =
3535 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3536 ? CGF.Builder.CreateICmpSLT(CVal, N)
3537 : CGF.Builder.CreateICmpULT(CVal, N);
3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540 // body:
3541 CGF.EmitBlock(BodyBB);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF.EmitIgnoredExpr(HelperData.Update);
3544 }
3545 }
~OMPIteratorGeneratorScope()3546 ~OMPIteratorGeneratorScope() {
3547 if (!E)
3548 return;
3549 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553 // goto cont;
3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555 // exit:
3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557 }
3558 }
3559 };
3560 } // namespace
3561
3562 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)3563 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565 llvm::Value *Addr;
3566 if (OASE) {
3567 const Expr *Base = OASE->getBase();
3568 Addr = CGF.EmitScalarExpr(Base);
3569 } else {
3570 Addr = CGF.EmitLValue(E).getPointer(CGF);
3571 }
3572 llvm::Value *SizeVal;
3573 QualType Ty = E->getType();
3574 if (OASE) {
3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576 for (const Expr *SE : OASE->getDimensions()) {
3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578 Sz = CGF.EmitScalarConversion(
3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581 }
3582 } else if (const auto *ASE =
3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585 Address UpAddrAddress = UpAddrLVal.getAddress();
3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588 /*Idx0=*/1);
3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592 } else {
3593 SizeVal = CGF.getTypeSize(Ty);
3594 }
3595 return std::make_pair(Addr, SizeVal);
3596 }
3597
3598 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)3599 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy.isNull()) {
3602 RecordDecl *KmpAffinityInfoRD =
3603 C.buildImplicitRecord("kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD->startDefinition();
3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608 KmpAffinityInfoRD->completeDefinition();
3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610 }
3611 }
3612
3613 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)3614 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3615 const OMPExecutableDirective &D,
3616 llvm::Function *TaskFunction, QualType SharedsTy,
3617 Address Shareds, const OMPTaskDataTy &Data) {
3618 ASTContext &C = CGM.getContext();
3619 llvm::SmallVector<PrivateDataTy, 4> Privates;
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I = Data.PrivateCopies.begin();
3622 for (const Expr *E : Data.PrivateVars) {
3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624 Privates.emplace_back(
3625 C.getDeclAlign(VD),
3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3628 ++I;
3629 }
3630 I = Data.FirstprivateCopies.begin();
3631 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632 for (const Expr *E : Data.FirstprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(
3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639 ++I;
3640 ++IElemInitRef;
3641 }
3642 I = Data.LastprivateCopies.begin();
3643 for (const Expr *E : Data.LastprivateVars) {
3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645 Privates.emplace_back(
3646 C.getDeclAlign(VD),
3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3649 ++I;
3650 }
3651 for (const VarDecl *VD : Data.PrivateLocals) {
3652 if (isAllocatableDecl(VD))
3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654 else
3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656 }
3657 llvm::stable_sort(Privates,
3658 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659 return L.first > R.first;
3660 });
3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3666 if (SavedKmpTaskloopTQTy.isNull()) {
3667 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669 }
3670 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3671 } else {
3672 assert((D.getDirectiveKind() == OMPD_task ||
3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy.isNull()) {
3677 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679 }
3680 KmpTaskTQTy = SavedKmpTaskTQTy;
3681 }
3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3685 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687 QualType KmpTaskTWithPrivatesPtrQTy =
3688 C.getPointerType(KmpTaskTWithPrivatesQTy);
3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691 KmpTaskTWithPrivatesTy->getPointerTo();
3692 llvm::Value *KmpTaskTWithPrivatesTySize =
3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695
3696 // Emit initial values for private copies (if any).
3697 llvm::Value *TaskPrivatesMap = nullptr;
3698 llvm::Type *TaskPrivatesMapTy =
3699 std::next(TaskFunction->arg_begin(), 3)->getType();
3700 if (!Privates.empty()) {
3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702 TaskPrivatesMap =
3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705 TaskPrivatesMap, TaskPrivatesMapTy);
3706 } else {
3707 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708 cast<llvm::PointerType>(TaskPrivatesMapTy));
3709 }
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711 // kmp_task_t *tt);
3712 llvm::Function *TaskEntry = emitProxyTaskFunction(
3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715 TaskPrivatesMap);
3716
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3723 enum {
3724 TiedFlag = 0x1,
3725 FinalFlag = 0x2,
3726 DestructorsFlag = 0x8,
3727 PriorityFlag = 0x20,
3728 DetachableFlag = 0x40,
3729 };
3730 unsigned Flags = Data.Tied ? TiedFlag : 0;
3731 bool NeedsCleanup = false;
3732 if (!Privates.empty()) {
3733 NeedsCleanup =
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735 if (NeedsCleanup)
3736 Flags = Flags | DestructorsFlag;
3737 }
3738 if (Data.Priority.getInt())
3739 Flags = Flags | PriorityFlag;
3740 if (D.hasClausesOfKind<OMPDetachClause>())
3741 Flags = Flags | DetachableFlag;
3742 llvm::Value *TaskFlags =
3743 Data.Final.getPointer()
3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745 CGF.Builder.getInt32(FinalFlag),
3746 CGF.Builder.getInt32(/*C=*/0))
3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3750 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3752 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753 TaskEntry, KmpRoutineEntryPtrTy)};
3754 llvm::Value *NewTask;
3755 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr *Device = nullptr;
3758 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759 Device = C->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value *DeviceID;
3762 if (Device)
3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764 CGF.Int64Ty, /*isSigned=*/true);
3765 else
3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767 AllocArgs.push_back(DeviceID);
3768 NewTask = CGF.EmitRuntimeCall(
3769 OMPBuilder.getOrCreateRuntimeFunction(
3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771 AllocArgs);
3772 } else {
3773 NewTask =
3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776 AllocArgs);
3777 }
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal = CGF.EmitLValue(Evt);
3784
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791 OMPBuilder.getOrCreateRuntimeFunction(
3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793 {Loc, Tid, NewTask});
3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795 Evt->getExprLoc());
3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797 }
3798 // Process affinity clauses.
3799 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800 // Process list of affinity data.
3801 ASTContext &C = CGM.getContext();
3802 Address AffinitiesArray = Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value *NumOfElements = nullptr;
3805 unsigned NumAffinities = 0;
3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807 if (const Expr *Modifier = C->getModifier()) {
3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812 NumOfElements =
3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814 }
3815 } else {
3816 NumAffinities += C->varlist_size();
3817 }
3818 }
3819 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823 QualType KmpTaskAffinityInfoArrayTy;
3824 if (NumOfElements) {
3825 NumOfElements = CGF.Builder.CreateNUWAdd(
3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827 auto *OVE = new (C) OpaqueValueExpr(
3828 Loc,
3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830 VK_PRValue);
3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832 RValue::get(NumOfElements));
3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3834 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836 // Properly emit variable-sized array.
3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3838 ImplicitParamKind::Other);
3839 CGF.EmitVarDecl(*PD);
3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842 /*isSigned=*/false);
3843 } else {
3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3845 KmpTaskAffinityInfoTy,
3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848 AffinitiesArray =
3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852 /*isSigned=*/false);
3853 }
3854
3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3857 unsigned Pos = 0;
3858 bool HasIterator = false;
3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860 if (C->getModifier()) {
3861 HasIterator = true;
3862 continue;
3863 }
3864 for (const Expr *E : C->varlists()) {
3865 llvm::Value *Addr;
3866 llvm::Value *Size;
3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868 LValue Base =
3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3870 KmpTaskAffinityInfoTy);
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal = CGF.EmitLValueForField(
3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875 BaseAddrLVal);
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal = CGF.EmitLValueForField(
3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879 CGF.EmitStoreOfScalar(Size, LenLVal);
3880 ++Pos;
3881 }
3882 }
3883 LValue PosLVal;
3884 if (HasIterator) {
3885 PosLVal = CGF.MakeAddrLValue(
3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887 C.getSizeType());
3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889 }
3890 // Process elements with iterators.
3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892 const Expr *Modifier = C->getModifier();
3893 if (!Modifier)
3894 continue;
3895 OMPIteratorGeneratorScope IteratorScope(
3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897 for (const Expr *E : C->varlists()) {
3898 llvm::Value *Addr;
3899 llvm::Value *Size;
3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902 LValue Base =
3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3904 KmpTaskAffinityInfoTy);
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal = CGF.EmitLValueForField(
3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909 BaseAddrLVal);
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal = CGF.EmitLValueForField(
3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913 CGF.EmitStoreOfScalar(Size, LenLVal);
3914 Idx = CGF.Builder.CreateNUWAdd(
3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916 CGF.EmitStoreOfScalar(Idx, PosLVal);
3917 }
3918 }
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923 llvm::Value *GTid = getThreadID(CGF, Loc);
3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF.EmitRuntimeCall(
3929 OMPBuilder.getOrCreateRuntimeFunction(
3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932 }
3933 llvm::Value *NewTaskNewTaskTTy =
3934 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3935 NewTask, KmpTaskTWithPrivatesPtrTy);
3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937 KmpTaskTWithPrivatesQTy);
3938 LValue TDBase =
3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr = Address::invalid();
3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr = Address(
3945 CGF.EmitLoadOfScalar(
3946 CGF.EmitLValueForField(
3947 TDBase,
3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949 Loc),
3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954 }
3955 // Emit initial values for private copies (if any).
3956 TaskResultTy Result;
3957 if (!Privates.empty()) {
3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959 SharedsTy, SharedsPtrTy, Data, Privates,
3960 /*ForDup=*/false);
3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963 Result.TaskDupFn = emitTaskDupFunction(
3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966 /*WithLastIter=*/!Data.LastprivateVars.empty());
3967 }
3968 }
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority = 0, Destructors = 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973 const RecordDecl *KmpCmplrdataUD =
3974 (*FI)->getType()->getAsUnionType()->getDecl();
3975 if (NeedsCleanup) {
3976 llvm::Value *DestructorFn = emitDestructorsFunction(
3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978 KmpTaskTWithPrivatesQTy);
3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980 LValue DestructorsLV = CGF.EmitLValueForField(
3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3982 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3983 DestructorFn, KmpRoutineEntryPtrTy),
3984 DestructorsLV);
3985 }
3986 // Set priority.
3987 if (Data.Priority.getInt()) {
3988 LValue Data2LV = CGF.EmitLValueForField(
3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990 LValue PriorityLV = CGF.EmitLValueForField(
3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993 }
3994 Result.NewTask = NewTask;
3995 Result.TaskEntry = TaskEntry;
3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997 Result.TDBase = TDBase;
3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999 return Result;
4000 }
4001
4002 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4003 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4004 RTLDependenceKindTy DepKind;
4005 switch (K) {
4006 case OMPC_DEPEND_in:
4007 DepKind = RTLDependenceKindTy::DepIn;
4008 break;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out:
4011 case OMPC_DEPEND_inout:
4012 DepKind = RTLDependenceKindTy::DepInOut;
4013 break;
4014 case OMPC_DEPEND_mutexinoutset:
4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016 break;
4017 case OMPC_DEPEND_inoutset:
4018 DepKind = RTLDependenceKindTy::DepInOutSet;
4019 break;
4020 case OMPC_DEPEND_outallmemory:
4021 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022 break;
4023 case OMPC_DEPEND_source:
4024 case OMPC_DEPEND_sink:
4025 case OMPC_DEPEND_depobj:
4026 case OMPC_DEPEND_inoutallmemory:
4027 case OMPC_DEPEND_unknown:
4028 llvm_unreachable("Unknown task dependence type");
4029 }
4030 return DepKind;
4031 }
4032
4033 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4034 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035 QualType &FlagsTy) {
4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037 if (KmpDependInfoTy.isNull()) {
4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039 KmpDependInfoRD->startDefinition();
4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043 KmpDependInfoRD->completeDefinition();
4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045 }
4046 }
4047
4048 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4049 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4050 SourceLocation Loc) {
4051 ASTContext &C = CGM.getContext();
4052 QualType FlagsTy;
4053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054 RecordDecl *KmpDependInfoRD =
4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4057 LValue Base = CGF.EmitLoadOfPointerLValue(
4058 DepobjLVal.getAddress().withElementType(
4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060 KmpDependInfoPtrTy->castAs<PointerType>());
4061 Address DepObjAddr = CGF.Builder.CreateGEP(
4062 CGF, Base.getAddress(),
4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064 LValue NumDepsBase = CGF.MakeAddrLValue(
4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal = CGF.EmitLValueForField(
4068 NumDepsBase,
4069 *std::next(KmpDependInfoRD->field_begin(),
4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072 return std::make_pair(NumDeps, Base);
4073 }
4074
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4075 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076 llvm::PointerUnion<unsigned *, LValue *> Pos,
4077 const OMPTaskDataTy::DependData &Data,
4078 Address DependenciesArray) {
4079 CodeGenModule &CGM = CGF.CGM;
4080 ASTContext &C = CGM.getContext();
4081 QualType FlagsTy;
4082 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083 RecordDecl *KmpDependInfoRD =
4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086
4087 OMPIteratorGeneratorScope IteratorScope(
4088 CGF, cast_or_null<OMPIteratorExpr>(
4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090 : nullptr));
4091 for (const Expr *E : Data.DepExprs) {
4092 llvm::Value *Addr;
4093 llvm::Value *Size;
4094
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4096 if (E) {
4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099 } else {
4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102 }
4103 LValue Base;
4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105 Base = CGF.MakeAddrLValue(
4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107 } else {
4108 assert(E && "Expected a non-null expression");
4109 LValue &PosLVal = *Pos.get<LValue *>();
4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111 Base = CGF.MakeAddrLValue(
4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113 }
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal = CGF.EmitLValueForField(
4116 Base,
4117 *std::next(KmpDependInfoRD->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal = CGF.EmitLValueForField(
4122 Base, *std::next(KmpDependInfoRD->field_begin(),
4123 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124 CGF.EmitStoreOfScalar(Size, LenLVal);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127 LValue FlagsLVal = CGF.EmitLValueForField(
4128 Base,
4129 *std::next(KmpDependInfoRD->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4131 CGF.EmitStoreOfScalar(
4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133 FlagsLVal);
4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135 ++(*P);
4136 } else {
4137 LValue &PosLVal = *Pos.get<LValue *>();
4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139 Idx = CGF.Builder.CreateNUWAdd(Idx,
4140 llvm::ConstantInt::get(Idx->getType(), 1));
4141 CGF.EmitStoreOfScalar(Idx, PosLVal);
4142 }
4143 }
4144 }
4145
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4146 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4148 const OMPTaskDataTy::DependData &Data) {
4149 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150 "Expected depobj dependency kind.");
4151 SmallVector<llvm::Value *, 4> Sizes;
4152 SmallVector<LValue, 4> SizeLVals;
4153 ASTContext &C = CGF.getContext();
4154 {
4155 OMPIteratorGeneratorScope IteratorScope(
4156 CGF, cast_or_null<OMPIteratorExpr>(
4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158 : nullptr));
4159 for (const Expr *E : Data.DepExprs) {
4160 llvm::Value *NumDeps;
4161 LValue Base;
4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163 std::tie(NumDeps, Base) =
4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165 LValue NumLVal = CGF.MakeAddrLValue(
4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167 C.getUIntPtrType());
4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169 NumLVal.getAddress());
4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172 CGF.EmitStoreOfScalar(Add, NumLVal);
4173 SizeLVals.push_back(NumLVal);
4174 }
4175 }
4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177 llvm::Value *Size =
4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179 Sizes.push_back(Size);
4180 }
4181 return Sizes;
4182 }
4183
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4184 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4185 QualType &KmpDependInfoTy,
4186 LValue PosLVal,
4187 const OMPTaskDataTy::DependData &Data,
4188 Address DependenciesArray) {
4189 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190 "Expected depobj dependency kind.");
4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192 {
4193 OMPIteratorGeneratorScope IteratorScope(
4194 CGF, cast_or_null<OMPIteratorExpr>(
4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196 : nullptr));
4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198 const Expr *E = Data.DepExprs[I];
4199 llvm::Value *NumDeps;
4200 LValue Base;
4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202 std::tie(NumDeps, Base) =
4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204
4205 // memcopy dependency data.
4206 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207 ElSize,
4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212
4213 // Increase pos.
4214 // pos += size;
4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216 CGF.EmitStoreOfScalar(Add, PosLVal);
4217 }
4218 }
4219 }
4220
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4221 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4222 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4223 SourceLocation Loc) {
4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225 return D.DepExprs.empty();
4226 }))
4227 return std::make_pair(nullptr, Address::invalid());
4228 // Process list of dependencies.
4229 ASTContext &C = CGM.getContext();
4230 Address DependenciesArray = Address::invalid();
4231 llvm::Value *NumOfElements = nullptr;
4232 unsigned NumDependencies = std::accumulate(
4233 Dependencies.begin(), Dependencies.end(), 0,
4234 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235 return D.DepKind == OMPC_DEPEND_depobj
4236 ? V
4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238 });
4239 QualType FlagsTy;
4240 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241 bool HasDepobjDeps = false;
4242 bool HasRegularWithIterators = false;
4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244 llvm::Value *NumOfRegularWithIterators =
4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4247 // iterators.
4248 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249 if (D.DepKind == OMPC_DEPEND_depobj) {
4250 SmallVector<llvm::Value *, 4> Sizes =
4251 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4252 for (llvm::Value *Size : Sizes) {
4253 NumOfDepobjElements =
4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255 }
4256 HasDepobjDeps = true;
4257 continue;
4258 }
4259 // Include number of iterations, if any.
4260
4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262 llvm::Value *ClauseIteratorSpace =
4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268 }
4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270 ClauseIteratorSpace,
4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272 NumOfRegularWithIterators =
4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274 HasRegularWithIterators = true;
4275 continue;
4276 }
4277 }
4278
4279 QualType KmpDependInfoArrayTy;
4280 if (HasDepobjDeps || HasRegularWithIterators) {
4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282 /*isSigned=*/false);
4283 if (HasDepobjDeps) {
4284 NumOfElements =
4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286 }
4287 if (HasRegularWithIterators) {
4288 NumOfElements =
4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290 }
4291 auto *OVE = new (C) OpaqueValueExpr(
4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293 VK_PRValue);
4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295 RValue::get(NumOfElements));
4296 KmpDependInfoArrayTy =
4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4302 ImplicitParamKind::Other);
4303 CGF.EmitVarDecl(*PD);
4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306 /*isSigned=*/false);
4307 } else {
4308 KmpDependInfoArrayTy = C.getConstantArrayType(
4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311 DependenciesArray =
4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315 /*isSigned=*/false);
4316 }
4317 unsigned Pos = 0;
4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320 Dependencies[I].IteratorExpr)
4321 continue;
4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323 DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331 !Dependencies[I].IteratorExpr)
4332 continue;
4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334 DependenciesArray);
4335 }
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps) {
4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340 continue;
4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342 DependenciesArray);
4343 }
4344 }
4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347 return std::make_pair(NumOfElements, DependenciesArray);
4348 }
4349
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4350 Address CGOpenMPRuntime::emitDepobjDependClause(
4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4352 SourceLocation Loc) {
4353 if (Dependencies.DepExprs.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4356 ASTContext &C = CGM.getContext();
4357 Address DependenciesArray = Address::invalid();
4358 unsigned NumDependencies = Dependencies.DepExprs.size();
4359 QualType FlagsTy;
4360 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361 RecordDecl *KmpDependInfoRD =
4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380 NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392 Size = CGM.getSize(Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403 CGM.getModule(), OMPRTL___kmpc_alloc),
4404 Args, ".dep.arr.addr");
4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4406 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4407 Addr, KmpDependInfoLlvmTy->getPointerTo());
4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal = CGF.EmitLValueForField(
4413 Base,
4414 *std::next(KmpDependInfoRD->field_begin(),
4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417 llvm::PointerUnion<unsigned *, LValue *> Pos;
4418 unsigned Idx = 1;
4419 LValue PosLVal;
4420 if (Dependencies.IteratorExpr) {
4421 PosLVal = CGF.MakeAddrLValue(
4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423 C.getSizeType());
4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425 /*IsInit=*/true);
4426 Pos = &PosLVal;
4427 } else {
4428 Pos = &Idx;
4429 }
4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433 CGF.Int8Ty);
4434 return DependenciesArray;
4435 }
4436
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4437 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4438 SourceLocation Loc) {
4439 ASTContext &C = CGM.getContext();
4440 QualType FlagsTy;
4441 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443 C.VoidPtrTy.castAs<PointerType>());
4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4445 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4447 CGF.ConvertTypeForMem(KmpDependInfoTy));
4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449 Addr.getElementType(), Addr.emitRawPointer(CGF),
4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452 CGF.VoidPtrTy);
4453 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454 // Use default allocator.
4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460 CGM.getModule(), OMPRTL___kmpc_free),
4461 Args);
4462 }
4463
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4464 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4465 OpenMPDependClauseKind NewDepKind,
4466 SourceLocation Loc) {
4467 ASTContext &C = CGM.getContext();
4468 QualType FlagsTy;
4469 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470 RecordDecl *KmpDependInfoRD =
4471 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4473 llvm::Value *NumDeps;
4474 LValue Base;
4475 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476
4477 Address Begin = Base.getAddress();
4478 // Cast from pointer to array type to pointer to single element.
4479 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4480 Begin.emitRawPointer(CGF), NumDeps);
4481 // The basic structure here is a while-do loop.
4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485 CGF.EmitBlock(BodyBB);
4486 llvm::PHINode *ElementPHI =
4487 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4488 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4489 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4490 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4491 Base.getTBAAInfo());
4492 // deps[i].flags = NewDepKind;
4493 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4494 LValue FlagsLVal = CGF.EmitLValueForField(
4495 Base, *std::next(KmpDependInfoRD->field_begin(),
4496 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4497 CGF.EmitStoreOfScalar(
4498 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4499 FlagsLVal);
4500
4501 // Shift the address forward by one element.
4502 llvm::Value *ElementNext =
4503 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4504 .emitRawPointer(CGF);
4505 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4506 llvm::Value *IsEmpty =
4507 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4508 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4509 // Done.
4510 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4511 }
4512
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4513 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4514 const OMPExecutableDirective &D,
4515 llvm::Function *TaskFunction,
4516 QualType SharedsTy, Address Shareds,
4517 const Expr *IfCond,
4518 const OMPTaskDataTy &Data) {
4519 if (!CGF.HaveInsertPoint())
4520 return;
4521
4522 TaskResultTy Result =
4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524 llvm::Value *NewTask = Result.NewTask;
4525 llvm::Function *TaskEntry = Result.TaskEntry;
4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527 LValue TDBase = Result.TDBase;
4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529 // Process list of dependences.
4530 Address DependenciesArray = Address::invalid();
4531 llvm::Value *NumOfElements;
4532 std::tie(NumOfElements, DependenciesArray) =
4533 emitDependClause(CGF, Data.Dependences, Loc);
4534
4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536 // libcall.
4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540 // list is not empty
4541 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544 llvm::Value *DepTaskArgs[7];
4545 if (!Data.Dependences.empty()) {
4546 DepTaskArgs[0] = UpLoc;
4547 DepTaskArgs[1] = ThreadID;
4548 DepTaskArgs[2] = NewTask;
4549 DepTaskArgs[3] = NumOfElements;
4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553 }
4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556 if (!Data.Tied) {
4557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4558 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4559 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4560 }
4561 if (!Data.Dependences.empty()) {
4562 CGF.EmitRuntimeCall(
4563 OMPBuilder.getOrCreateRuntimeFunction(
4564 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4565 DepTaskArgs);
4566 } else {
4567 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568 CGM.getModule(), OMPRTL___kmpc_omp_task),
4569 TaskArgs);
4570 }
4571 // Check if parent region is untied and build return for untied task;
4572 if (auto *Region =
4573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574 Region->emitUntiedSwitch(CGF);
4575 };
4576
4577 llvm::Value *DepWaitTaskArgs[7];
4578 if (!Data.Dependences.empty()) {
4579 DepWaitTaskArgs[0] = UpLoc;
4580 DepWaitTaskArgs[1] = ThreadID;
4581 DepWaitTaskArgs[2] = NumOfElements;
4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585 DepWaitTaskArgs[6] =
4586 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4587 }
4588 auto &M = CGM.getModule();
4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590 TaskEntry, &Data, &DepWaitTaskArgs,
4591 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596 // is specified.
4597 if (!Data.Dependences.empty())
4598 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4599 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4600 DepWaitTaskArgs);
4601 // Call proxy_task_entry(gtid, new_task);
4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604 Action.Enter(CGF);
4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4607 OutlinedFnArgs);
4608 };
4609
4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611 // kmp_task_t *new_task);
4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613 // kmp_task_t *new_task);
4614 RegionCodeGenTy RCG(CodeGen);
4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616 M, OMPRTL___kmpc_omp_task_begin_if0),
4617 TaskArgs,
4618 OMPBuilder.getOrCreateRuntimeFunction(
4619 M, OMPRTL___kmpc_omp_task_complete_if0),
4620 TaskArgs);
4621 RCG.setAction(Action);
4622 RCG(CGF);
4623 };
4624
4625 if (IfCond) {
4626 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4627 } else {
4628 RegionCodeGenTy ThenRCG(ThenCodeGen);
4629 ThenRCG(CGF);
4630 }
4631 }
4632
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4633 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4634 const OMPLoopDirective &D,
4635 llvm::Function *TaskFunction,
4636 QualType SharedsTy, Address Shareds,
4637 const Expr *IfCond,
4638 const OMPTaskDataTy &Data) {
4639 if (!CGF.HaveInsertPoint())
4640 return;
4641 TaskResultTy Result =
4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644 // libcall.
4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647 // sched, kmp_uint64 grainsize, void *task_dup);
4648 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650 llvm::Value *IfVal;
4651 if (IfCond) {
4652 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4653 /*isSigned=*/true);
4654 } else {
4655 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4656 }
4657
4658 LValue LBLVal = CGF.EmitLValueForField(
4659 Result.TDBase,
4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4661 const auto *LBVar =
4662 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4663 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4664 /*IsInitializer=*/true);
4665 LValue UBLVal = CGF.EmitLValueForField(
4666 Result.TDBase,
4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4668 const auto *UBVar =
4669 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4670 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue StLVal = CGF.EmitLValueForField(
4673 Result.TDBase,
4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4675 const auto *StVar =
4676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4678 /*IsInitializer=*/true);
4679 // Store reductions address.
4680 LValue RedLVal = CGF.EmitLValueForField(
4681 Result.TDBase,
4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4683 if (Data.Reductions) {
4684 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4685 } else {
4686 CGF.EmitNullInitialization(RedLVal.getAddress(),
4687 CGF.getContext().VoidPtrTy);
4688 }
4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690 llvm::Value *TaskArgs[] = {
4691 UpLoc,
4692 ThreadID,
4693 Result.NewTask,
4694 IfVal,
4695 LBLVal.getPointer(CGF),
4696 UBLVal.getPointer(CGF),
4697 CGF.EmitLoadOfScalar(StLVal, Loc),
4698 llvm::ConstantInt::getSigned(
4699 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4700 llvm::ConstantInt::getSigned(
4701 CGF.IntTy, Data.Schedule.getPointer()
4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4703 : NoSchedule),
4704 Data.Schedule.getPointer()
4705 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4706 /*isSigned=*/false)
4707 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4708 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4709 Result.TaskDupFn, CGF.VoidPtrTy)
4710 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4712 CGM.getModule(), OMPRTL___kmpc_taskloop),
4713 TaskArgs);
4714 }
4715
4716 /// Emit reduction operation for each element of array (required for
4717 /// array sections) LHS op = RHS.
4718 /// \param Type Type of array.
4719 /// \param LHSVar Variable on the left side of the reduction operation
4720 /// (references element of array in original variable).
4721 /// \param RHSVar Variable on the right side of the reduction operation
4722 /// (references element of array in original variable).
4723 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)4725 static void EmitOMPAggregateReduction(
4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727 const VarDecl *RHSVar,
4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729 const Expr *, const Expr *)> &RedOpGen,
4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731 const Expr *UpExpr = nullptr) {
4732 // Perform element-by-element initialization.
4733 QualType ElementTy;
4734 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4735 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4736
4737 // Drill down to the base element type on both arrays.
4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4740
4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743 // Cast from pointer to array type to pointer to single element.
4744 llvm::Value *LHSEnd =
4745 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4746 // The basic structure here is a while-do loop.
4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4749 llvm::Value *IsEmpty =
4750 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4751 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4752
4753 // Enter the loop body, making that address the current address.
4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755 CGF.EmitBlock(BodyBB);
4756
4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4758
4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4761 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4762 Address RHSElementCurrent(
4763 RHSElementPHI, RHSAddr.getElementType(),
4764 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4765
4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4768 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4769 Address LHSElementCurrent(
4770 LHSElementPHI, LHSAddr.getElementType(),
4771 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4772
4773 // Emit copy.
4774 CodeGenFunction::OMPPrivateScope Scope(CGF);
4775 Scope.addPrivate(LHSVar, LHSElementCurrent);
4776 Scope.addPrivate(RHSVar, RHSElementCurrent);
4777 Scope.Privatize();
4778 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779 Scope.ForceCleanup();
4780
4781 // Shift the address forward by one element.
4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4784 "omp.arraycpy.dest.element");
4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4787 "omp.arraycpy.src.element");
4788 // Check whether we've reached the end.
4789 llvm::Value *Done =
4790 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4791 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4792 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4793 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4794
4795 // Done.
4796 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4797 }
4798
4799 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4800 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)4802 static void emitReductionCombiner(CodeGenFunction &CGF,
4803 const Expr *ReductionOp) {
4804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4806 if (const auto *DRE =
4807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4808 if (const auto *DRD =
4809 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4810 std::pair<llvm::Function *, llvm::Function *> Reduction =
4811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4812 RValue Func = RValue::get(Reduction.first);
4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814 CGF.EmitIgnoredExpr(ReductionOp);
4815 return;
4816 }
4817 CGF.EmitIgnoredExpr(ReductionOp);
4818 }
4819
emitReductionFunction(StringRef ReducerName,SourceLocation Loc,llvm::Type * ArgsElemType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)4820 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4822 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4824 ASTContext &C = CGM.getContext();
4825
4826 // void reduction_func(void *LHSArg, void *RHSArg);
4827 FunctionArgList Args;
4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4829 ImplicitParamKind::Other);
4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4831 ImplicitParamKind::Other);
4832 Args.push_back(&LHSArg);
4833 Args.push_back(&RHSArg);
4834 const auto &CGFI =
4835 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4836 std::string Name = getReductionFuncName(ReducerName);
4837 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4838 llvm::GlobalValue::InternalLinkage, Name,
4839 &CGM.getModule());
4840 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4841 Fn->setDoesNotRecurse();
4842 CodeGenFunction CGF(CGM);
4843 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4844
4845 // Dst = (void*[n])(LHSArg);
4846 // Src = (void*[n])(RHSArg);
4847 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4848 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4849 ArgsElemType->getPointerTo()),
4850 ArgsElemType, CGF.getPointerAlign());
4851 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4853 ArgsElemType->getPointerTo()),
4854 ArgsElemType, CGF.getPointerAlign());
4855
4856 // ...
4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858 // ...
4859 CodeGenFunction::OMPPrivateScope Scope(CGF);
4860 const auto *IPriv = Privates.begin();
4861 unsigned Idx = 0;
4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863 const auto *RHSVar =
4864 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4865 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4866 const auto *LHSVar =
4867 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4868 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4869 QualType PrivTy = (*IPriv)->getType();
4870 if (PrivTy->isVariablyModifiedType()) {
4871 // Get array size and emit VLA type.
4872 ++Idx;
4873 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4875 const VariableArrayType *VLA =
4876 CGF.getContext().getAsVariableArrayType(PrivTy);
4877 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4878 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4879 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4880 CGF.EmitVariablyModifiedType(PrivTy);
4881 }
4882 }
4883 Scope.Privatize();
4884 IPriv = Privates.begin();
4885 const auto *ILHS = LHSExprs.begin();
4886 const auto *IRHS = RHSExprs.begin();
4887 for (const Expr *E : ReductionOps) {
4888 if ((*IPriv)->getType()->isArrayType()) {
4889 // Emit reduction for array section.
4890 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4891 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4892 EmitOMPAggregateReduction(
4893 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4894 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895 emitReductionCombiner(CGF, E);
4896 });
4897 } else {
4898 // Emit reduction for array subscript or single variable.
4899 emitReductionCombiner(CGF, E);
4900 }
4901 ++IPriv;
4902 ++ILHS;
4903 ++IRHS;
4904 }
4905 Scope.ForceCleanup();
4906 CGF.FinishFunction();
4907 return Fn;
4908 }
4909
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)4910 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4911 const Expr *ReductionOp,
4912 const Expr *PrivateRef,
4913 const DeclRefExpr *LHS,
4914 const DeclRefExpr *RHS) {
4915 if (PrivateRef->getType()->isArrayType()) {
4916 // Emit reduction for array section.
4917 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4918 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4919 EmitOMPAggregateReduction(
4920 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4921 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922 emitReductionCombiner(CGF, ReductionOp);
4923 });
4924 } else {
4925 // Emit reduction for array subscript or single variable.
4926 emitReductionCombiner(CGF, ReductionOp);
4927 }
4928 }
4929
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)4930 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4931 ArrayRef<const Expr *> Privates,
4932 ArrayRef<const Expr *> LHSExprs,
4933 ArrayRef<const Expr *> RHSExprs,
4934 ArrayRef<const Expr *> ReductionOps,
4935 ReductionOptionsTy Options) {
4936 if (!CGF.HaveInsertPoint())
4937 return;
4938
4939 bool WithNowait = Options.WithNowait;
4940 bool SimpleReduction = Options.SimpleReduction;
4941
4942 // Next code should be emitted for reduction:
4943 //
4944 // static kmp_critical_name lock = { 0 };
4945 //
4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948 // ...
4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950 // *(Type<n>-1*)rhs[<n>-1]);
4951 // }
4952 //
4953 // ...
4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956 // RedList, reduce_func, &<lock>)) {
4957 // case 1:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962 // break;
4963 // case 2:
4964 // ...
4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966 // ...
4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968 // break;
4969 // default:;
4970 // }
4971 //
4972 // if SimpleReduction is true, only the next code is generated:
4973 // ...
4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975 // ...
4976
4977 ASTContext &C = CGM.getContext();
4978
4979 if (SimpleReduction) {
4980 CodeGenFunction::RunCleanupsScope Scope(CGF);
4981 const auto *IPriv = Privates.begin();
4982 const auto *ILHS = LHSExprs.begin();
4983 const auto *IRHS = RHSExprs.begin();
4984 for (const Expr *E : ReductionOps) {
4985 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4986 cast<DeclRefExpr>(*IRHS));
4987 ++IPriv;
4988 ++ILHS;
4989 ++IRHS;
4990 }
4991 return;
4992 }
4993
4994 // 1. Build a list of reduction variables.
4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996 auto Size = RHSExprs.size();
4997 for (const Expr *E : Privates) {
4998 if (E->getType()->isVariablyModifiedType())
4999 // Reserve place for array size.
5000 ++Size;
5001 }
5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003 QualType ReductionArrayTy = C.getConstantArrayType(
5004 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5005 /*IndexTypeQuals=*/0);
5006 RawAddress ReductionList =
5007 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5008 const auto *IPriv = Privates.begin();
5009 unsigned Idx = 0;
5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5012 CGF.Builder.CreateStore(
5013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5014 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5015 Elem);
5016 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017 // Store array size.
5018 ++Idx;
5019 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5020 llvm::Value *Size = CGF.Builder.CreateIntCast(
5021 CGF.getVLASize(
5022 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5023 .NumElts,
5024 CGF.SizeTy, /*isSigned=*/false);
5025 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5026 Elem);
5027 }
5028 }
5029
5030 // 2. Emit reduce_func().
5031 llvm::Function *ReductionFn = emitReductionFunction(
5032 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5033 Privates, LHSExprs, RHSExprs, ReductionOps);
5034
5035 // 3. Create static kmp_critical_name lock = { 0 };
5036 std::string Name = getName({"reduction"});
5037 llvm::Value *Lock = getCriticalRegionLock(Name);
5038
5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040 // RedList, reduce_func, &<lock>);
5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5042 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 ReductionList.getPointer(), CGF.VoidPtrTy);
5046 llvm::Value *Args[] = {
5047 IdentTLoc, // ident_t *<loc>
5048 ThreadId, // i32 <gtid>
5049 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5050 ReductionArrayTySize, // size_type sizeof(RedList)
5051 RL, // void *RedList
5052 ReductionFn, // void (*) (void *, void *) <reduce_func>
5053 Lock // kmp_critical_name *&<lock>
5054 };
5055 llvm::Value *Res = CGF.EmitRuntimeCall(
5056 OMPBuilder.getOrCreateRuntimeFunction(
5057 CGM.getModule(),
5058 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059 Args);
5060
5061 // 5. Build switch(res)
5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5063 llvm::SwitchInst *SwInst =
5064 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5065
5066 // 6. Build case 1:
5067 // ...
5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069 // ...
5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071 // break;
5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5073 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5074 CGF.EmitBlock(Case1BB);
5075
5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077 llvm::Value *EndArgs[] = {
5078 IdentTLoc, // ident_t *<loc>
5079 ThreadId, // i32 <gtid>
5080 Lock // kmp_critical_name *&<lock>
5081 };
5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083 CodeGenFunction &CGF, PrePostActionTy &Action) {
5084 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5085 const auto *IPriv = Privates.begin();
5086 const auto *ILHS = LHSExprs.begin();
5087 const auto *IRHS = RHSExprs.begin();
5088 for (const Expr *E : ReductionOps) {
5089 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5090 cast<DeclRefExpr>(*IRHS));
5091 ++IPriv;
5092 ++ILHS;
5093 ++IRHS;
5094 }
5095 };
5096 RegionCodeGenTy RCG(CodeGen);
5097 CommonActionTy Action(
5098 nullptr, std::nullopt,
5099 OMPBuilder.getOrCreateRuntimeFunction(
5100 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101 : OMPRTL___kmpc_end_reduce),
5102 EndArgs);
5103 RCG.setAction(Action);
5104 RCG(CGF);
5105
5106 CGF.EmitBranch(DefaultBB);
5107
5108 // 7. Build case 2:
5109 // ...
5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111 // ...
5112 // break;
5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5114 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5115 CGF.EmitBlock(Case2BB);
5116
5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118 CodeGenFunction &CGF, PrePostActionTy &Action) {
5119 const auto *ILHS = LHSExprs.begin();
5120 const auto *IRHS = RHSExprs.begin();
5121 const auto *IPriv = Privates.begin();
5122 for (const Expr *E : ReductionOps) {
5123 const Expr *XExpr = nullptr;
5124 const Expr *EExpr = nullptr;
5125 const Expr *UpExpr = nullptr;
5126 BinaryOperatorKind BO = BO_Comma;
5127 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5128 if (BO->getOpcode() == BO_Assign) {
5129 XExpr = BO->getLHS();
5130 UpExpr = BO->getRHS();
5131 }
5132 }
5133 // Try to emit update expression as a simple atomic.
5134 const Expr *RHSExpr = UpExpr;
5135 if (RHSExpr) {
5136 // Analyze RHS part of the whole expression.
5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138 RHSExpr->IgnoreParenImpCasts())) {
5139 // If this is a conditional operator, analyze its condition for
5140 // min/max reduction operator.
5141 RHSExpr = ACO->getCond();
5142 }
5143 if (const auto *BORHS =
5144 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5145 EExpr = BORHS->getRHS();
5146 BO = BORHS->getOpcode();
5147 }
5148 }
5149 if (XExpr) {
5150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5151 auto &&AtomicRedGen = [BO, VD,
5152 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153 const Expr *EExpr, const Expr *UpExpr) {
5154 LValue X = CGF.EmitLValue(XExpr);
5155 RValue E;
5156 if (EExpr)
5157 E = CGF.EmitAnyExpr(EExpr);
5158 CGF.EmitOMPAtomicSimpleUpdateExpr(
5159 X, E, BO, /*IsXLHSInRHSPart=*/true,
5160 llvm::AtomicOrdering::Monotonic, Loc,
5161 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5164 CGF.emitOMPSimpleStore(
5165 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5166 VD->getType().getNonReferenceType(), Loc);
5167 PrivateScope.addPrivate(VD, LHSTemp);
5168 (void)PrivateScope.Privatize();
5169 return CGF.EmitAnyExpr(UpExpr);
5170 });
5171 };
5172 if ((*IPriv)->getType()->isArrayType()) {
5173 // Emit atomic reduction for array section.
5174 const auto *RHSVar =
5175 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5176 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5177 AtomicRedGen, XExpr, EExpr, UpExpr);
5178 } else {
5179 // Emit atomic reduction for array subscript or single variable.
5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181 }
5182 } else {
5183 // Emit as a critical region.
5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185 const Expr *, const Expr *) {
5186 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5187 std::string Name = RT.getName({"atomic_reduction"});
5188 RT.emitCriticalRegion(
5189 CGF, Name,
5190 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191 Action.Enter(CGF);
5192 emitReductionCombiner(CGF, E);
5193 },
5194 Loc);
5195 };
5196 if ((*IPriv)->getType()->isArrayType()) {
5197 const auto *LHSVar =
5198 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5199 const auto *RHSVar =
5200 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5201 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5202 CritRedGen);
5203 } else {
5204 CritRedGen(CGF, nullptr, nullptr, nullptr);
5205 }
5206 }
5207 ++ILHS;
5208 ++IRHS;
5209 ++IPriv;
5210 }
5211 };
5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213 if (!WithNowait) {
5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215 llvm::Value *EndArgs[] = {
5216 IdentTLoc, // ident_t *<loc>
5217 ThreadId, // i32 <gtid>
5218 Lock // kmp_critical_name *&<lock>
5219 };
5220 CommonActionTy Action(nullptr, std::nullopt,
5221 OMPBuilder.getOrCreateRuntimeFunction(
5222 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5223 EndArgs);
5224 AtomicRCG.setAction(Action);
5225 AtomicRCG(CGF);
5226 } else {
5227 AtomicRCG(CGF);
5228 }
5229
5230 CGF.EmitBranch(DefaultBB);
5231 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5232 }
5233
5234 /// Generates unique name for artificial threadprivate variables.
5235 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5236 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237 const Expr *Ref) {
5238 SmallString<256> Buffer;
5239 llvm::raw_svector_ostream Out(Buffer);
5240 const clang::DeclRefExpr *DE;
5241 const VarDecl *D = ::getBaseDecl(Ref, DE);
5242 if (!D)
5243 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5244 D = D->getCanonicalDecl();
5245 std::string Name = CGM.getOpenMPRuntime().getName(
5246 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5247 Out << Prefix << Name << "_"
5248 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5249 return std::string(Out.str());
5250 }
5251
5252 /// Emits reduction initializer function:
5253 /// \code
5254 /// void @.red_init(void* %arg, void* %orig) {
5255 /// %0 = bitcast void* %arg to <type>*
5256 /// store <type> <init>, <type>* %0
5257 /// ret void
5258 /// }
5259 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5260 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5261 SourceLocation Loc,
5262 ReductionCodeGen &RCG, unsigned N) {
5263 ASTContext &C = CGM.getContext();
5264 QualType VoidPtrTy = C.VoidPtrTy;
5265 VoidPtrTy.addRestrict();
5266 FunctionArgList Args;
5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5268 ImplicitParamKind::Other);
5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5270 ImplicitParamKind::Other);
5271 Args.emplace_back(&Param);
5272 Args.emplace_back(&ParamOrig);
5273 const auto &FnInfo =
5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5276 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5277 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5278 Name, &CGM.getModule());
5279 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5280 Fn->setDoesNotRecurse();
5281 CodeGenFunction CGF(CGM);
5282 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5283 QualType PrivateType = RCG.getPrivateType(N);
5284 Address PrivateAddr = CGF.EmitLoadOfPointer(
5285 CGF.GetAddrOfLocalVar(&Param).withElementType(
5286 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5287 C.getPointerType(PrivateType)->castAs<PointerType>());
5288 llvm::Value *Size = nullptr;
5289 // If the size of the reduction item is non-constant, load it from global
5290 // threadprivate variable.
5291 if (RCG.getSizes(N).second) {
5292 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5293 CGF, CGM.getContext().getSizeType(),
5294 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5295 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296 CGM.getContext().getSizeType(), Loc);
5297 }
5298 RCG.emitAggregateType(CGF, N, Size);
5299 Address OrigAddr = Address::invalid();
5300 // If initializer uses initializer from declare reduction construct, emit a
5301 // pointer to the address of the original reduction item (reuired by reduction
5302 // initializer)
5303 if (RCG.usesReductionInitializer(N)) {
5304 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5305 OrigAddr = CGF.EmitLoadOfPointer(
5306 SharedAddr,
5307 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308 }
5309 // Emit the initializer:
5310 // %0 = bitcast void* %arg to <type>*
5311 // store <type> <init>, <type>* %0
5312 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5313 [](CodeGenFunction &) { return false; });
5314 CGF.FinishFunction();
5315 return Fn;
5316 }
5317
5318 /// Emits reduction combiner function:
5319 /// \code
5320 /// void @.red_comb(void* %arg0, void* %arg1) {
5321 /// %lhs = bitcast void* %arg0 to <type>*
5322 /// %rhs = bitcast void* %arg1 to <type>*
5323 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324 /// store <type> %2, <type>* %lhs
5325 /// ret void
5326 /// }
5327 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5328 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5329 SourceLocation Loc,
5330 ReductionCodeGen &RCG, unsigned N,
5331 const Expr *ReductionOp,
5332 const Expr *LHS, const Expr *RHS,
5333 const Expr *PrivateRef) {
5334 ASTContext &C = CGM.getContext();
5335 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5336 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5337 FunctionArgList Args;
5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339 C.VoidPtrTy, ImplicitParamKind::Other);
5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5341 ImplicitParamKind::Other);
5342 Args.emplace_back(&ParamInOut);
5343 Args.emplace_back(&ParamIn);
5344 const auto &FnInfo =
5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5347 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5348 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5349 Name, &CGM.getModule());
5350 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5351 Fn->setDoesNotRecurse();
5352 CodeGenFunction CGF(CGM);
5353 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5354 llvm::Value *Size = nullptr;
5355 // If the size of the reduction item is non-constant, load it from global
5356 // threadprivate variable.
5357 if (RCG.getSizes(N).second) {
5358 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5359 CGF, CGM.getContext().getSizeType(),
5360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5361 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5362 CGM.getContext().getSizeType(), Loc);
5363 }
5364 RCG.emitAggregateType(CGF, N, Size);
5365 // Remap lhs and rhs variables to the addresses of the function arguments.
5366 // %lhs = bitcast void* %arg0 to <type>*
5367 // %rhs = bitcast void* %arg1 to <type>*
5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369 PrivateScope.addPrivate(
5370 LHSVD,
5371 // Pull out the pointer to the variable.
5372 CGF.EmitLoadOfPointer(
5373 CGF.GetAddrOfLocalVar(&ParamInOut)
5374 .withElementType(
5375 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5376 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5377 PrivateScope.addPrivate(
5378 RHSVD,
5379 // Pull out the pointer to the variable.
5380 CGF.EmitLoadOfPointer(
5381 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5382 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5383 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5384 PrivateScope.Privatize();
5385 // Emit the combiner body:
5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387 // store <type> %2, <type>* %lhs
5388 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5390 cast<DeclRefExpr>(RHS));
5391 CGF.FinishFunction();
5392 return Fn;
5393 }
5394
5395 /// Emits reduction finalizer function:
5396 /// \code
5397 /// void @.red_fini(void* %arg) {
5398 /// %0 = bitcast void* %arg to <type>*
5399 /// <destroy>(<type>* %0)
5400 /// ret void
5401 /// }
5402 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5404 SourceLocation Loc,
5405 ReductionCodeGen &RCG, unsigned N) {
5406 if (!RCG.needCleanups(N))
5407 return nullptr;
5408 ASTContext &C = CGM.getContext();
5409 FunctionArgList Args;
5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5411 ImplicitParamKind::Other);
5412 Args.emplace_back(&Param);
5413 const auto &FnInfo =
5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5418 Name, &CGM.getModule());
5419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5420 Fn->setDoesNotRecurse();
5421 CodeGenFunction CGF(CGM);
5422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5423 Address PrivateAddr = CGF.EmitLoadOfPointer(
5424 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5425 llvm::Value *Size = nullptr;
5426 // If the size of the reduction item is non-constant, load it from global
5427 // threadprivate variable.
5428 if (RCG.getSizes(N).second) {
5429 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5430 CGF, CGM.getContext().getSizeType(),
5431 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5432 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5433 CGM.getContext().getSizeType(), Loc);
5434 }
5435 RCG.emitAggregateType(CGF, N, Size);
5436 // Emit the finalizer body:
5437 // <destroy>(<type>* %0)
5438 RCG.emitCleanups(CGF, N, PrivateAddr);
5439 CGF.FinishFunction(Loc);
5440 return Fn;
5441 }
5442
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5443 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5444 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447 return nullptr;
5448
5449 // Build typedef struct:
5450 // kmp_taskred_input {
5451 // void *reduce_shar; // shared reduction item
5452 // void *reduce_orig; // original reduction item used for initialization
5453 // size_t reduce_size; // size of data item
5454 // void *reduce_init; // data initialization routine
5455 // void *reduce_fini; // data finalization routine
5456 // void *reduce_comb; // data combiner routine
5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5458 // } kmp_taskred_input_t;
5459 ASTContext &C = CGM.getContext();
5460 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5461 RD->startDefinition();
5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5468 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470 RD->completeDefinition();
5471 QualType RDType = C.getRecordType(RD);
5472 unsigned Size = Data.ReductionVars.size();
5473 llvm::APInt ArraySize(/*numBits=*/64, Size);
5474 QualType ArrayRDType =
5475 C.getConstantArrayType(RDType, ArraySize, nullptr,
5476 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477 // kmp_task_red_input_t .rd_input.[Size];
5478 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480 Data.ReductionCopies, Data.ReductionOps);
5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5484 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488 ".rd_input.gep.");
5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5490 // ElemLVal.reduce_shar = &Shareds[Cnt];
5491 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5492 RCG.emitSharedOrigLValue(CGF, Cnt);
5493 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5494 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5495 // ElemLVal.reduce_orig = &Origs[Cnt];
5496 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5497 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5498 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5499 RCG.emitAggregateType(CGF, Cnt);
5500 llvm::Value *SizeValInChars;
5501 llvm::Value *SizeVal;
5502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5503 // We use delayed creation/initialization for VLAs and array sections. It is
5504 // required because runtime does not provide the way to pass the sizes of
5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506 // threadprivate global variables are used to store these values and use
5507 // them in the functions.
5508 bool DelayedCreation = !!SizeVal;
5509 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5510 /*isSigned=*/false);
5511 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5512 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5513 // ElemLVal.reduce_init = init;
5514 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5516 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5517 // ElemLVal.reduce_fini = fini;
5518 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5520 llvm::Value *FiniAddr =
5521 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5522 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5523 // ElemLVal.reduce_comb = comb;
5524 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5525 llvm::Value *CombAddr = emitReduceCombFunction(
5526 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5527 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5528 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5529 // ElemLVal.flags = 0;
5530 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5531 if (DelayedCreation) {
5532 CGF.EmitStoreOfScalar(
5533 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5534 FlagsLVal);
5535 } else
5536 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5537 }
5538 if (Data.IsReductionWithTaskMod) {
5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540 // is_ws, int num, void *data);
5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5543 CGM.IntTy, /*isSigned=*/true);
5544 llvm::Value *Args[] = {
5545 IdentTLoc, GTid,
5546 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5547 /*isSigned=*/true),
5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5550 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5551 return CGF.EmitRuntimeCall(
5552 OMPBuilder.getOrCreateRuntimeFunction(
5553 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5554 Args);
5555 }
5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557 llvm::Value *Args[] = {
5558 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5559 /*isSigned=*/true),
5560 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5561 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5562 CGM.VoidPtrTy)};
5563 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5564 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5565 Args);
5566 }
5567
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)5568 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5569 SourceLocation Loc,
5570 bool IsWorksharingReduction) {
5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572 // is_ws, int num, void *data);
5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5575 CGM.IntTy, /*isSigned=*/true);
5576 llvm::Value *Args[] = {IdentTLoc, GTid,
5577 llvm::ConstantInt::get(CGM.IntTy,
5578 IsWorksharingReduction ? 1 : 0,
5579 /*isSigned=*/true)};
5580 (void)CGF.EmitRuntimeCall(
5581 OMPBuilder.getOrCreateRuntimeFunction(
5582 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5583 Args);
5584 }
5585
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5586 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5587 SourceLocation Loc,
5588 ReductionCodeGen &RCG,
5589 unsigned N) {
5590 auto Sizes = RCG.getSizes(N);
5591 // Emit threadprivate global variable if the type is non-constant
5592 // (Sizes.second = nullptr).
5593 if (Sizes.second) {
5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5595 /*isSigned=*/false);
5596 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5597 CGF, CGM.getContext().getSizeType(),
5598 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5599 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5600 }
5601 }
5602
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)5603 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5604 SourceLocation Loc,
5605 llvm::Value *ReductionsPtr,
5606 LValue SharedLVal) {
5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608 // *d);
5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5610 CGM.IntTy,
5611 /*isSigned=*/true),
5612 ReductionsPtr,
5613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5614 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5615 return Address(
5616 CGF.EmitRuntimeCall(
5617 OMPBuilder.getOrCreateRuntimeFunction(
5618 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5619 Args),
5620 CGF.Int8Ty, SharedLVal.getAlignment());
5621 }
5622
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)5623 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5624 const OMPTaskDataTy &Data) {
5625 if (!CGF.HaveInsertPoint())
5626 return;
5627
5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630 OMPBuilder.createTaskwait(CGF.Builder);
5631 } else {
5632 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634 auto &M = CGM.getModule();
5635 Address DependenciesArray = Address::invalid();
5636 llvm::Value *NumOfElements;
5637 std::tie(NumOfElements, DependenciesArray) =
5638 emitDependClause(CGF, Data.Dependences, Loc);
5639 if (!Data.Dependences.empty()) {
5640 llvm::Value *DepWaitTaskArgs[7];
5641 DepWaitTaskArgs[0] = UpLoc;
5642 DepWaitTaskArgs[1] = ThreadID;
5643 DepWaitTaskArgs[2] = NumOfElements;
5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5647 DepWaitTaskArgs[6] =
5648 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5649
5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651
5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655 // kmp_int32 has_no_wait); if dependence info is specified.
5656 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5657 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5658 DepWaitTaskArgs);
5659
5660 } else {
5661
5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663 // global_tid);
5664 llvm::Value *Args[] = {UpLoc, ThreadID};
5665 // Ignore return result until untied tasks are supported.
5666 CGF.EmitRuntimeCall(
5667 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5668 Args);
5669 }
5670 }
5671
5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5673 Region->emitUntiedSwitch(CGF);
5674 }
5675
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)5676 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5677 OpenMPDirectiveKind InnerKind,
5678 const RegionCodeGenTy &CodeGen,
5679 bool HasCancel) {
5680 if (!CGF.HaveInsertPoint())
5681 return;
5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683 InnerKind != OMPD_critical &&
5684 InnerKind != OMPD_master &&
5685 InnerKind != OMPD_masked);
5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687 }
5688
5689 namespace {
5690 enum RTCancelKind {
5691 CancelNoreq = 0,
5692 CancelParallel = 1,
5693 CancelLoop = 2,
5694 CancelSections = 3,
5695 CancelTaskgroup = 4
5696 };
5697 } // anonymous namespace
5698
getCancellationKind(OpenMPDirectiveKind CancelRegion)5699 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700 RTCancelKind CancelKind = CancelNoreq;
5701 if (CancelRegion == OMPD_parallel)
5702 CancelKind = CancelParallel;
5703 else if (CancelRegion == OMPD_for)
5704 CancelKind = CancelLoop;
5705 else if (CancelRegion == OMPD_sections)
5706 CancelKind = CancelSections;
5707 else {
5708 assert(CancelRegion == OMPD_taskgroup);
5709 CancelKind = CancelTaskgroup;
5710 }
5711 return CancelKind;
5712 }
5713
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)5714 void CGOpenMPRuntime::emitCancellationPointCall(
5715 CodeGenFunction &CGF, SourceLocation Loc,
5716 OpenMPDirectiveKind CancelRegion) {
5717 if (!CGF.HaveInsertPoint())
5718 return;
5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720 // global_tid, kmp_int32 cncl_kind);
5721 if (auto *OMPRegionInfo =
5722 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5723 // For 'cancellation point taskgroup', the task region info may not have a
5724 // cancel. This may instead happen in another adjacent task.
5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726 llvm::Value *Args[] = {
5727 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5728 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5729 // Ignore return result until untied tasks are supported.
5730 llvm::Value *Result = CGF.EmitRuntimeCall(
5731 OMPBuilder.getOrCreateRuntimeFunction(
5732 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5733 Args);
5734 // if (__kmpc_cancellationpoint()) {
5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736 // exit from construct;
5737 // }
5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5741 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5742 CGF.EmitBlock(ExitBB);
5743 if (CancelRegion == OMPD_parallel)
5744 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5745 // exit from construct;
5746 CodeGenFunction::JumpDest CancelDest =
5747 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5748 CGF.EmitBranchThroughCleanup(CancelDest);
5749 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5750 }
5751 }
5752 }
5753
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)5754 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5755 const Expr *IfCond,
5756 OpenMPDirectiveKind CancelRegion) {
5757 if (!CGF.HaveInsertPoint())
5758 return;
5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760 // kmp_int32 cncl_kind);
5761 auto &M = CGM.getModule();
5762 if (auto *OMPRegionInfo =
5763 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5764 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5766 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5767 llvm::Value *Args[] = {
5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5770 // Ignore return result until untied tasks are supported.
5771 llvm::Value *Result = CGF.EmitRuntimeCall(
5772 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5773 // if (__kmpc_cancel()) {
5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775 // exit from construct;
5776 // }
5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5780 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5781 CGF.EmitBlock(ExitBB);
5782 if (CancelRegion == OMPD_parallel)
5783 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5784 // exit from construct;
5785 CodeGenFunction::JumpDest CancelDest =
5786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5787 CGF.EmitBranchThroughCleanup(CancelDest);
5788 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5789 };
5790 if (IfCond) {
5791 emitIfClause(CGF, IfCond, ThenGen,
5792 [](CodeGenFunction &, PrePostActionTy &) {});
5793 } else {
5794 RegionCodeGenTy ThenRCG(ThenGen);
5795 ThenRCG(CGF);
5796 }
5797 }
5798 }
5799
5800 namespace {
5801 /// Cleanup action for uses_allocators support.
5802 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5803 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5804
5805 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)5806 OMPUsesAllocatorsActionTy(
5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808 : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)5809 void Enter(CodeGenFunction &CGF) override {
5810 if (!CGF.HaveInsertPoint())
5811 return;
5812 for (const auto &AllocatorData : Allocators) {
5813 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5814 CGF, AllocatorData.first, AllocatorData.second);
5815 }
5816 }
Exit(CodeGenFunction & CGF)5817 void Exit(CodeGenFunction &CGF) override {
5818 if (!CGF.HaveInsertPoint())
5819 return;
5820 for (const auto &AllocatorData : Allocators) {
5821 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5822 AllocatorData.first);
5823 }
5824 }
5825 };
5826 } // namespace
5827
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)5828 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5829 const OMPExecutableDirective &D, StringRef ParentName,
5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832 assert(!ParentName.empty() && "Invalid target entry parent name!");
5833 HasEmittedTargetRegion = true;
5834 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838 if (!D.AllocatorTraits)
5839 continue;
5840 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5841 }
5842 }
5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844 CodeGen.setAction(UsesAllocatorAction);
5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846 IsOffloadEntry, CodeGen);
5847 }
5848
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)5849 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5850 const Expr *Allocator,
5851 const Expr *AllocatorTraits) {
5852 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5853 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5854 // Use default memspace handle.
5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5856 llvm::Value *NumTraits = llvm::ConstantInt::get(
5857 CGF.IntTy, cast<ConstantArrayType>(
5858 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859 ->getSize()
5860 .getLimitedValue());
5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5862 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5863 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5865 AllocatorTraitsLVal.getBaseInfo(),
5866 AllocatorTraitsLVal.getTBAAInfo());
5867 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868
5869 llvm::Value *AllocatorVal =
5870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5871 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5872 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5873 // Store to allocator.
5874 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5875 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5876 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5877 AllocatorVal =
5878 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5879 Allocator->getType(), Allocator->getExprLoc());
5880 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5881 }
5882
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)5883 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5884 const Expr *Allocator) {
5885 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5886 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5887 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5888 llvm::Value *AllocatorVal =
5889 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5890 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5891 CGF.getContext().VoidPtrTy,
5892 Allocator->getExprLoc());
5893 (void)CGF.EmitRuntimeCall(
5894 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5895 OMPRTL___kmpc_destroy_allocator),
5896 {ThreadId, AllocatorVal});
5897 }
5898
computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective & D,CodeGenFunction & CGF,int32_t & MinThreadsVal,int32_t & MaxThreadsVal,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)5899 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5900 const OMPExecutableDirective &D, CodeGenFunction &CGF,
5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902 int32_t &MaxTeamsVal) {
5903
5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5906 /*UpperBoundOnly=*/true);
5907
5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909 for (auto *A : C->getAttrs()) {
5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5913 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5914 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5916 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5917 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5918 &AttrMaxThreadsVal);
5919 else
5920 continue;
5921
5922 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5923 if (AttrMaxThreadsVal > 0)
5924 MaxThreadsVal = MaxThreadsVal > 0
5925 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5926 : AttrMaxThreadsVal;
5927 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5928 if (AttrMaxBlocksVal > 0)
5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5930 : AttrMaxBlocksVal;
5931 }
5932 }
5933 }
5934
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)5935 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5936 const OMPExecutableDirective &D, StringRef ParentName,
5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939
5940 llvm::TargetRegionEntryInfo EntryInfo =
5941 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5942
5943 CodeGenFunction CGF(CGM, true);
5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5947
5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5950 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5951 };
5952
5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5954 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955
5956 if (!OutlinedFn)
5957 return;
5958
5959 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5960
5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962 for (auto *A : C->getAttrs()) {
5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5964 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5965 }
5966 }
5967 }
5968
5969 /// Checks if the expression is constant or does not have non-trivial function
5970 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)5971 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972 // We can skip constant expressions.
5973 // We can skip expressions with trivial calls or simple expressions.
5974 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5975 !E->hasNonTrivialCall(Ctx)) &&
5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977 }
5978
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)5979 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5980 const Stmt *Body) {
5981 const Stmt *Child = Body->IgnoreContainers();
5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5983 Child = nullptr;
5984 for (const Stmt *S : C->body()) {
5985 if (const auto *E = dyn_cast<Expr>(S)) {
5986 if (isTrivial(Ctx, E))
5987 continue;
5988 }
5989 // Some of the statements can be ignored.
5990 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5991 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5992 continue;
5993 // Analyze declarations.
5994 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5995 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5996 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5997 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5998 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5999 isa<UsingDirectiveDecl>(D) ||
6000 isa<OMPDeclareReductionDecl>(D) ||
6001 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6002 return true;
6003 const auto *VD = dyn_cast<VarDecl>(D);
6004 if (!VD)
6005 return false;
6006 return VD->hasGlobalStorage() || !VD->isUsed();
6007 }))
6008 continue;
6009 }
6010 // Found multiple children - cannot get the one child only.
6011 if (Child)
6012 return nullptr;
6013 Child = S;
6014 }
6015 if (Child)
6016 Child = Child->IgnoreContainers();
6017 }
6018 return Child;
6019 }
6020
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)6021 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023 int32_t &MaxTeamsVal) {
6024
6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027 "Expected target-based executable directive.");
6028 switch (DirectiveKind) {
6029 case OMPD_target: {
6030 const auto *CS = D.getInnermostCapturedStmt();
6031 const auto *Body =
6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033 const Stmt *ChildStmt =
6034 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6035 if (const auto *NestedDir =
6036 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6037 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039 const Expr *NumTeams =
6040 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6041 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6042 if (auto Constant =
6043 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6044 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6045 return NumTeams;
6046 }
6047 MinTeamsVal = MaxTeamsVal = 0;
6048 return nullptr;
6049 }
6050 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6051 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6052 MinTeamsVal = MaxTeamsVal = 1;
6053 return nullptr;
6054 }
6055 MinTeamsVal = MaxTeamsVal = 1;
6056 return nullptr;
6057 }
6058 // A value of -1 is used to check if we need to emit no teams region
6059 MinTeamsVal = MaxTeamsVal = -1;
6060 return nullptr;
6061 }
6062 case OMPD_target_teams_loop:
6063 case OMPD_target_teams:
6064 case OMPD_target_teams_distribute:
6065 case OMPD_target_teams_distribute_simd:
6066 case OMPD_target_teams_distribute_parallel_for:
6067 case OMPD_target_teams_distribute_parallel_for_simd: {
6068 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6069 const Expr *NumTeams =
6070 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6071 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6072 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6073 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6074 return NumTeams;
6075 }
6076 MinTeamsVal = MaxTeamsVal = 0;
6077 return nullptr;
6078 }
6079 case OMPD_target_parallel:
6080 case OMPD_target_parallel_for:
6081 case OMPD_target_parallel_for_simd:
6082 case OMPD_target_parallel_loop:
6083 case OMPD_target_simd:
6084 MinTeamsVal = MaxTeamsVal = 1;
6085 return nullptr;
6086 case OMPD_parallel:
6087 case OMPD_for:
6088 case OMPD_parallel_for:
6089 case OMPD_parallel_loop:
6090 case OMPD_parallel_master:
6091 case OMPD_parallel_sections:
6092 case OMPD_for_simd:
6093 case OMPD_parallel_for_simd:
6094 case OMPD_cancel:
6095 case OMPD_cancellation_point:
6096 case OMPD_ordered:
6097 case OMPD_threadprivate:
6098 case OMPD_allocate:
6099 case OMPD_task:
6100 case OMPD_simd:
6101 case OMPD_tile:
6102 case OMPD_unroll:
6103 case OMPD_sections:
6104 case OMPD_section:
6105 case OMPD_single:
6106 case OMPD_master:
6107 case OMPD_critical:
6108 case OMPD_taskyield:
6109 case OMPD_barrier:
6110 case OMPD_taskwait:
6111 case OMPD_taskgroup:
6112 case OMPD_atomic:
6113 case OMPD_flush:
6114 case OMPD_depobj:
6115 case OMPD_scan:
6116 case OMPD_teams:
6117 case OMPD_target_data:
6118 case OMPD_target_exit_data:
6119 case OMPD_target_enter_data:
6120 case OMPD_distribute:
6121 case OMPD_distribute_simd:
6122 case OMPD_distribute_parallel_for:
6123 case OMPD_distribute_parallel_for_simd:
6124 case OMPD_teams_distribute:
6125 case OMPD_teams_distribute_simd:
6126 case OMPD_teams_distribute_parallel_for:
6127 case OMPD_teams_distribute_parallel_for_simd:
6128 case OMPD_target_update:
6129 case OMPD_declare_simd:
6130 case OMPD_declare_variant:
6131 case OMPD_begin_declare_variant:
6132 case OMPD_end_declare_variant:
6133 case OMPD_declare_target:
6134 case OMPD_end_declare_target:
6135 case OMPD_declare_reduction:
6136 case OMPD_declare_mapper:
6137 case OMPD_taskloop:
6138 case OMPD_taskloop_simd:
6139 case OMPD_master_taskloop:
6140 case OMPD_master_taskloop_simd:
6141 case OMPD_parallel_master_taskloop:
6142 case OMPD_parallel_master_taskloop_simd:
6143 case OMPD_requires:
6144 case OMPD_metadirective:
6145 case OMPD_unknown:
6146 break;
6147 default:
6148 break;
6149 }
6150 llvm_unreachable("Unexpected directive kind.");
6151 }
6152
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6153 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6154 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6155 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6156 "Clauses associated with the teams directive expected to be emitted "
6157 "only for the host!");
6158 CGBuilderTy &Bld = CGF.Builder;
6159 int32_t MinNT = -1, MaxNT = -1;
6160 const Expr *NumTeams =
6161 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6162 if (NumTeams != nullptr) {
6163 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6164
6165 switch (DirectiveKind) {
6166 case OMPD_target: {
6167 const auto *CS = D.getInnermostCapturedStmt();
6168 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6169 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6170 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6171 /*IgnoreResultAssign*/ true);
6172 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6173 /*isSigned=*/true);
6174 }
6175 case OMPD_target_teams:
6176 case OMPD_target_teams_distribute:
6177 case OMPD_target_teams_distribute_simd:
6178 case OMPD_target_teams_distribute_parallel_for:
6179 case OMPD_target_teams_distribute_parallel_for_simd: {
6180 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6181 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6182 /*IgnoreResultAssign*/ true);
6183 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6184 /*isSigned=*/true);
6185 }
6186 default:
6187 break;
6188 }
6189 }
6190
6191 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6192 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6193 }
6194
6195 /// Check for a num threads constant value (stored in \p DefaultVal), or
6196 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6197 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6198 /// nullptr, no expression evaluation is perfomed.
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,const Expr ** E,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal)6199 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6200 const Expr **E, int32_t &UpperBound,
6201 bool UpperBoundOnly, llvm::Value **CondVal) {
6202 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6203 CGF.getContext(), CS->getCapturedStmt());
6204 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6205 if (!Dir)
6206 return;
6207
6208 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6209 // Handle if clause. If if clause present, the number of threads is
6210 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6211 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6212 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6214 const OMPIfClause *IfClause = nullptr;
6215 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6216 if (C->getNameModifier() == OMPD_unknown ||
6217 C->getNameModifier() == OMPD_parallel) {
6218 IfClause = C;
6219 break;
6220 }
6221 }
6222 if (IfClause) {
6223 const Expr *CondExpr = IfClause->getCondition();
6224 bool Result;
6225 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6226 if (!Result) {
6227 UpperBound = 1;
6228 return;
6229 }
6230 } else {
6231 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6232 if (const auto *PreInit =
6233 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6234 for (const auto *I : PreInit->decls()) {
6235 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6236 CGF.EmitVarDecl(cast<VarDecl>(*I));
6237 } else {
6238 CodeGenFunction::AutoVarEmission Emission =
6239 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6240 CGF.EmitAutoVarCleanups(Emission);
6241 }
6242 }
6243 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6244 }
6245 }
6246 }
6247 }
6248 // Check the value of num_threads clause iff if clause was not specified
6249 // or is not evaluated to false.
6250 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6251 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6252 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6253 const auto *NumThreadsClause =
6254 Dir->getSingleClause<OMPNumThreadsClause>();
6255 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6256 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6257 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6258 UpperBound =
6259 UpperBound
6260 ? Constant->getZExtValue()
6261 : std::min(UpperBound,
6262 static_cast<int32_t>(Constant->getZExtValue()));
6263 // If we haven't found a upper bound, remember we saw a thread limiting
6264 // clause.
6265 if (UpperBound == -1)
6266 UpperBound = 0;
6267 if (!E)
6268 return;
6269 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6270 if (const auto *PreInit =
6271 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6272 for (const auto *I : PreInit->decls()) {
6273 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6274 CGF.EmitVarDecl(cast<VarDecl>(*I));
6275 } else {
6276 CodeGenFunction::AutoVarEmission Emission =
6277 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6278 CGF.EmitAutoVarCleanups(Emission);
6279 }
6280 }
6281 }
6282 *E = NTExpr;
6283 }
6284 return;
6285 }
6286 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6287 UpperBound = 1;
6288 }
6289
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal,const Expr ** ThreadLimitExpr)6290 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6291 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6292 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6293 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6294 "Clauses associated with the teams directive expected to be emitted "
6295 "only for the host!");
6296 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6297 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6298 "Expected target-based executable directive.");
6299
6300 const Expr *NT = nullptr;
6301 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6302
6303 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6304 if (E->isIntegerConstantExpr(CGF.getContext())) {
6305 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6306 UpperBound = UpperBound ? Constant->getZExtValue()
6307 : std::min(UpperBound,
6308 int32_t(Constant->getZExtValue()));
6309 }
6310 // If we haven't found a upper bound, remember we saw a thread limiting
6311 // clause.
6312 if (UpperBound == -1)
6313 UpperBound = 0;
6314 if (EPtr)
6315 *EPtr = E;
6316 };
6317
6318 auto ReturnSequential = [&]() {
6319 UpperBound = 1;
6320 return NT;
6321 };
6322
6323 switch (DirectiveKind) {
6324 case OMPD_target: {
6325 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6326 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6327 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6328 CGF.getContext(), CS->getCapturedStmt());
6329 // TODO: The standard is not clear how to resolve two thread limit clauses,
6330 // let's pick the teams one if it's present, otherwise the target one.
6331 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6332 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6333 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6334 ThreadLimitClause = TLC;
6335 if (ThreadLimitExpr) {
6336 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6338 CodeGenFunction::LexicalScope Scope(
6339 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6340 if (const auto *PreInit =
6341 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6342 for (const auto *I : PreInit->decls()) {
6343 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6344 CGF.EmitVarDecl(cast<VarDecl>(*I));
6345 } else {
6346 CodeGenFunction::AutoVarEmission Emission =
6347 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6348 CGF.EmitAutoVarCleanups(Emission);
6349 }
6350 }
6351 }
6352 }
6353 }
6354 }
6355 if (ThreadLimitClause)
6356 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6357 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6358 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6359 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6360 CS = Dir->getInnermostCapturedStmt();
6361 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6362 CGF.getContext(), CS->getCapturedStmt());
6363 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6364 }
6365 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6366 CS = Dir->getInnermostCapturedStmt();
6367 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6368 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6369 return ReturnSequential();
6370 }
6371 return NT;
6372 }
6373 case OMPD_target_teams: {
6374 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6375 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6376 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6377 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6378 }
6379 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6380 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6381 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6382 CGF.getContext(), CS->getCapturedStmt());
6383 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6384 if (Dir->getDirectiveKind() == OMPD_distribute) {
6385 CS = Dir->getInnermostCapturedStmt();
6386 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6387 }
6388 }
6389 return NT;
6390 }
6391 case OMPD_target_teams_distribute:
6392 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6393 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6394 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6395 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6396 }
6397 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6398 UpperBoundOnly, CondVal);
6399 return NT;
6400 case OMPD_target_teams_loop:
6401 case OMPD_target_parallel_loop:
6402 case OMPD_target_parallel:
6403 case OMPD_target_parallel_for:
6404 case OMPD_target_parallel_for_simd:
6405 case OMPD_target_teams_distribute_parallel_for:
6406 case OMPD_target_teams_distribute_parallel_for_simd: {
6407 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6408 const OMPIfClause *IfClause = nullptr;
6409 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6410 if (C->getNameModifier() == OMPD_unknown ||
6411 C->getNameModifier() == OMPD_parallel) {
6412 IfClause = C;
6413 break;
6414 }
6415 }
6416 if (IfClause) {
6417 const Expr *Cond = IfClause->getCondition();
6418 bool Result;
6419 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6420 if (!Result)
6421 return ReturnSequential();
6422 } else {
6423 CodeGenFunction::RunCleanupsScope Scope(CGF);
6424 *CondVal = CGF.EvaluateExprAsBool(Cond);
6425 }
6426 }
6427 }
6428 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6429 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6430 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6431 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6432 }
6433 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6434 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6435 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6436 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6437 return NumThreadsClause->getNumThreads();
6438 }
6439 return NT;
6440 }
6441 case OMPD_target_teams_distribute_simd:
6442 case OMPD_target_simd:
6443 return ReturnSequential();
6444 default:
6445 break;
6446 }
6447 llvm_unreachable("Unsupported directive kind.");
6448 }
6449
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6450 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6451 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6452 llvm::Value *NumThreadsVal = nullptr;
6453 llvm::Value *CondVal = nullptr;
6454 llvm::Value *ThreadLimitVal = nullptr;
6455 const Expr *ThreadLimitExpr = nullptr;
6456 int32_t UpperBound = -1;
6457
6458 const Expr *NT = getNumThreadsExprForTargetDirective(
6459 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6460 &ThreadLimitExpr);
6461
6462 // Thread limit expressions are used below, emit them.
6463 if (ThreadLimitExpr) {
6464 ThreadLimitVal =
6465 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6466 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6467 /*isSigned=*/false);
6468 }
6469
6470 // Generate the num teams expression.
6471 if (UpperBound == 1) {
6472 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6473 } else if (NT) {
6474 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6475 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6476 /*isSigned=*/false);
6477 } else if (ThreadLimitVal) {
6478 // If we do not have a num threads value but a thread limit, replace the
6479 // former with the latter. We know handled the thread limit expression.
6480 NumThreadsVal = ThreadLimitVal;
6481 ThreadLimitVal = nullptr;
6482 } else {
6483 // Default to "0" which means runtime choice.
6484 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6485 NumThreadsVal = CGF.Builder.getInt32(0);
6486 }
6487
6488 // Handle if clause. If if clause present, the number of threads is
6489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6490 if (CondVal) {
6491 CodeGenFunction::RunCleanupsScope Scope(CGF);
6492 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6493 CGF.Builder.getInt32(1));
6494 }
6495
6496 // If the thread limit and num teams expression were present, take the
6497 // minimum.
6498 if (ThreadLimitVal) {
6499 NumThreadsVal = CGF.Builder.CreateSelect(
6500 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6501 ThreadLimitVal, NumThreadsVal);
6502 }
6503
6504 return NumThreadsVal;
6505 }
6506
6507 namespace {
6508 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6509
6510 // Utility to handle information from clauses associated with a given
6511 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6512 // It provides a convenient interface to obtain the information and generate
6513 // code for that information.
6514 class MappableExprsHandler {
6515 public:
6516 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()6517 static unsigned getFlagMemberOffset() {
6518 unsigned Offset = 0;
6519 for (uint64_t Remain =
6520 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6521 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6522 !(Remain & 1); Remain = Remain >> 1)
6523 Offset++;
6524 return Offset;
6525 }
6526
6527 /// Class that holds debugging information for a data mapping to be passed to
6528 /// the runtime library.
6529 class MappingExprInfo {
6530 /// The variable declaration used for the data mapping.
6531 const ValueDecl *MapDecl = nullptr;
6532 /// The original expression used in the map clause, or null if there is
6533 /// none.
6534 const Expr *MapExpr = nullptr;
6535
6536 public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)6537 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6538 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6539
getMapDecl() const6540 const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const6541 const Expr *getMapExpr() const { return MapExpr; }
6542 };
6543
6544 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6545 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6546 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6547 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6548 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6549 using MapNonContiguousArrayTy =
6550 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6551 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6552 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6553
6554 /// This structure contains combined information generated for mappable
6555 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6556 /// mappers, and non-contiguous information.
6557 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6558 MapExprsArrayTy Exprs;
6559 MapValueDeclsArrayTy Mappers;
6560 MapValueDeclsArrayTy DevicePtrDecls;
6561
6562 /// Append arrays in \a CurInfo.
append__anon93cce0fb2c11::MappableExprsHandler::MapCombinedInfoTy6563 void append(MapCombinedInfoTy &CurInfo) {
6564 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6565 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6566 CurInfo.DevicePtrDecls.end());
6567 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6568 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6569 }
6570 };
6571
6572 /// Map between a struct and the its lowest & highest elements which have been
6573 /// mapped.
6574 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6575 /// HE(FieldIndex, Pointer)}
6576 struct StructRangeInfoTy {
6577 MapCombinedInfoTy PreliminaryMapData;
6578 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6579 0, Address::invalid()};
6580 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6581 0, Address::invalid()};
6582 Address Base = Address::invalid();
6583 Address LB = Address::invalid();
6584 bool IsArraySection = false;
6585 bool HasCompleteRecord = false;
6586 };
6587
6588 private:
6589 /// Kind that defines how a device pointer has to be returned.
6590 struct MapInfo {
6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6592 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6593 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6594 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6595 bool ReturnDevicePointer = false;
6596 bool IsImplicit = false;
6597 const ValueDecl *Mapper = nullptr;
6598 const Expr *VarRef = nullptr;
6599 bool ForDeviceAddr = false;
6600
6601 MapInfo() = default;
MapInfo__anon93cce0fb2c11::MappableExprsHandler::MapInfo6602 MapInfo(
6603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6604 OpenMPMapClauseKind MapType,
6605 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6606 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6607 bool ReturnDevicePointer, bool IsImplicit,
6608 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6609 bool ForDeviceAddr = false)
6610 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6611 MotionModifiers(MotionModifiers),
6612 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6613 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6614 };
6615
6616 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6617 /// member and there is no map information about it, then emission of that
6618 /// entry is deferred until the whole struct has been processed.
6619 struct DeferredDevicePtrEntryTy {
6620 const Expr *IE = nullptr;
6621 const ValueDecl *VD = nullptr;
6622 bool ForDeviceAddr = false;
6623
DeferredDevicePtrEntryTy__anon93cce0fb2c11::MappableExprsHandler::DeferredDevicePtrEntryTy6624 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6625 bool ForDeviceAddr)
6626 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6627 };
6628
6629 /// The target directive from where the mappable clauses were extracted. It
6630 /// is either a executable directive or a user-defined mapper directive.
6631 llvm::PointerUnion<const OMPExecutableDirective *,
6632 const OMPDeclareMapperDecl *>
6633 CurDir;
6634
6635 /// Function the directive is being generated for.
6636 CodeGenFunction &CGF;
6637
6638 /// Set of all first private variables in the current directive.
6639 /// bool data is set to true if the variable is implicitly marked as
6640 /// firstprivate, false otherwise.
6641 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6642
6643 /// Map between device pointer declarations and their expression components.
6644 /// The key value for declarations in 'this' is null.
6645 llvm::DenseMap<
6646 const ValueDecl *,
6647 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6648 DevPointersMap;
6649
6650 /// Map between device addr declarations and their expression components.
6651 /// The key value for declarations in 'this' is null.
6652 llvm::DenseMap<
6653 const ValueDecl *,
6654 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6655 HasDevAddrsMap;
6656
6657 /// Map between lambda declarations and their map type.
6658 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6659
getExprTypeSize(const Expr * E) const6660 llvm::Value *getExprTypeSize(const Expr *E) const {
6661 QualType ExprTy = E->getType().getCanonicalType();
6662
6663 // Calculate the size for array shaping expression.
6664 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6665 llvm::Value *Size =
6666 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6667 for (const Expr *SE : OAE->getDimensions()) {
6668 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6669 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6670 CGF.getContext().getSizeType(),
6671 SE->getExprLoc());
6672 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6673 }
6674 return Size;
6675 }
6676
6677 // Reference types are ignored for mapping purposes.
6678 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6679 ExprTy = RefTy->getPointeeType().getCanonicalType();
6680
6681 // Given that an array section is considered a built-in type, we need to
6682 // do the calculation based on the length of the section instead of relying
6683 // on CGF.getTypeSize(E->getType()).
6684 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6685 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6686 OAE->getBase()->IgnoreParenImpCasts())
6687 .getCanonicalType();
6688
6689 // If there is no length associated with the expression and lower bound is
6690 // not specified too, that means we are using the whole length of the
6691 // base.
6692 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6693 !OAE->getLowerBound())
6694 return CGF.getTypeSize(BaseTy);
6695
6696 llvm::Value *ElemSize;
6697 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6698 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6699 } else {
6700 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6701 assert(ATy && "Expecting array type if not a pointer type.");
6702 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6703 }
6704
6705 // If we don't have a length at this point, that is because we have an
6706 // array section with a single element.
6707 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6708 return ElemSize;
6709
6710 if (const Expr *LenExpr = OAE->getLength()) {
6711 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6712 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6713 CGF.getContext().getSizeType(),
6714 LenExpr->getExprLoc());
6715 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6716 }
6717 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6718 OAE->getLowerBound() && "expected array_section[lb:].");
6719 // Size = sizetype - lb * elemtype;
6720 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6721 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6722 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6723 CGF.getContext().getSizeType(),
6724 OAE->getLowerBound()->getExprLoc());
6725 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6726 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6727 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6728 LengthVal = CGF.Builder.CreateSelect(
6729 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6730 return LengthVal;
6731 }
6732 return CGF.getTypeSize(ExprTy);
6733 }
6734
6735 /// Return the corresponding bits for a given map clause modifier. Add
6736 /// a flag marking the map as a pointer if requested. Add a flag marking the
6737 /// map as the first one of a series of maps that relate to the same map
6738 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const6739 OpenMPOffloadMappingFlags getMapTypeBits(
6740 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6741 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6742 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6743 OpenMPOffloadMappingFlags Bits =
6744 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6745 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6746 switch (MapType) {
6747 case OMPC_MAP_alloc:
6748 case OMPC_MAP_release:
6749 // alloc and release is the default behavior in the runtime library, i.e.
6750 // if we don't pass any bits alloc/release that is what the runtime is
6751 // going to do. Therefore, we don't need to signal anything for these two
6752 // type modifiers.
6753 break;
6754 case OMPC_MAP_to:
6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6756 break;
6757 case OMPC_MAP_from:
6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6759 break;
6760 case OMPC_MAP_tofrom:
6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6762 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6763 break;
6764 case OMPC_MAP_delete:
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6766 break;
6767 case OMPC_MAP_unknown:
6768 llvm_unreachable("Unexpected map type!");
6769 }
6770 if (AddPtrFlag)
6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6772 if (AddIsTargetParamFlag)
6773 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6774 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6776 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6777 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6778 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6779 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6780 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6781 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6782 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6783 if (IsNonContiguous)
6784 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6785 return Bits;
6786 }
6787
6788 /// Return true if the provided expression is a final array section. A
6789 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const6790 bool isFinalArraySectionExpression(const Expr *E) const {
6791 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6792
6793 // It is not an array section and therefore not a unity-size one.
6794 if (!OASE)
6795 return false;
6796
6797 // An array section with no colon always refer to a single element.
6798 if (OASE->getColonLocFirst().isInvalid())
6799 return false;
6800
6801 const Expr *Length = OASE->getLength();
6802
6803 // If we don't have a length we have to check if the array has size 1
6804 // for this dimension. Also, we should always expect a length if the
6805 // base type is pointer.
6806 if (!Length) {
6807 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6808 OASE->getBase()->IgnoreParenImpCasts())
6809 .getCanonicalType();
6810 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6811 return ATy->getSExtSize() != 1;
6812 // If we don't have a constant dimension length, we have to consider
6813 // the current section as having any size, so it is not necessarily
6814 // unitary. If it happen to be unity size, that's user fault.
6815 return true;
6816 }
6817
6818 // Check if the length evaluates to 1.
6819 Expr::EvalResult Result;
6820 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6821 return true; // Can have more that size 1.
6822
6823 llvm::APSInt ConstLength = Result.Val.getInt();
6824 return ConstLength.getSExtValue() != 1;
6825 }
6826
6827 /// Generate the base pointers, section pointers, sizes, map type bits, and
6828 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6829 /// map type, map or motion modifiers, and expression components.
6830 /// \a IsFirstComponent should be set to true if the provided set of
6831 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,MapCombinedInfoTy & StructBaseCombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool GenerateAllInfoForClauses,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=std::nullopt,bool AreBothBasePtrAndPteeMapped=false) const6832 void generateInfoForComponentList(
6833 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6834 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6836 MapCombinedInfoTy &CombinedInfo,
6837 MapCombinedInfoTy &StructBaseCombinedInfo,
6838 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6839 bool IsImplicit, bool GenerateAllInfoForClauses,
6840 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6841 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6842 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6843 OverlappedElements = std::nullopt,
6844 bool AreBothBasePtrAndPteeMapped = false) const {
6845 // The following summarizes what has to be generated for each map and the
6846 // types below. The generated information is expressed in this order:
6847 // base pointer, section pointer, size, flags
6848 // (to add to the ones that come from the map type and modifier).
6849 //
6850 // double d;
6851 // int i[100];
6852 // float *p;
6853 // int **a = &i;
6854 //
6855 // struct S1 {
6856 // int i;
6857 // float f[50];
6858 // }
6859 // struct S2 {
6860 // int i;
6861 // float f[50];
6862 // S1 s;
6863 // double *p;
6864 // struct S2 *ps;
6865 // int &ref;
6866 // }
6867 // S2 s;
6868 // S2 *ps;
6869 //
6870 // map(d)
6871 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6872 //
6873 // map(i)
6874 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6875 //
6876 // map(i[1:23])
6877 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6878 //
6879 // map(p)
6880 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6881 //
6882 // map(p[1:24])
6883 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6884 // in unified shared memory mode or for local pointers
6885 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6886 //
6887 // map((*a)[0:3])
6888 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6889 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6890 //
6891 // map(**a)
6892 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6893 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6894 //
6895 // map(s)
6896 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6897 //
6898 // map(s.i)
6899 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6900 //
6901 // map(s.s.f)
6902 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6903 //
6904 // map(s.p)
6905 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6906 //
6907 // map(to: s.p[:22])
6908 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6909 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6910 // &(s.p), &(s.p[0]), 22*sizeof(double),
6911 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6912 // (*) alloc space for struct members, only this is a target parameter
6913 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6914 // optimizes this entry out, same in the examples below)
6915 // (***) map the pointee (map: to)
6916 //
6917 // map(to: s.ref)
6918 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6919 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6920 // (*) alloc space for struct members, only this is a target parameter
6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6922 // optimizes this entry out, same in the examples below)
6923 // (***) map the pointee (map: to)
6924 //
6925 // map(s.ps)
6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6927 //
6928 // map(from: s.ps->s.i)
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6932 //
6933 // map(to: s.ps->ps)
6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6937 //
6938 // map(s.ps->ps->ps)
6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6943 //
6944 // map(to: s.ps->ps->s.f[:22])
6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6949 //
6950 // map(ps)
6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6952 //
6953 // map(ps->i)
6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6955 //
6956 // map(ps->s.f)
6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6958 //
6959 // map(from: ps->p)
6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6961 //
6962 // map(to: ps->p[:22])
6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6966 //
6967 // map(ps->ps)
6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6969 //
6970 // map(from: ps->ps->s.i)
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6974 //
6975 // map(from: ps->ps->ps)
6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6979 //
6980 // map(ps->ps->ps->ps)
6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6985 //
6986 // map(to: ps->ps->ps->s.f[:22])
6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6991 //
6992 // map(to: s.f[:22]) map(from: s.p[:33])
6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6994 // sizeof(double*) (**), TARGET_PARAM
6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6998 // (*) allocate contiguous space needed to fit all mapped members even if
6999 // we allocate space for members not mapped (in this example,
7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7001 // them as well because they fall between &s.f[0] and &s.p)
7002 //
7003 // map(from: s.f[:22]) map(to: ps->p[:33])
7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7008 // (*) the struct this entry pertains to is the 2nd element in the list of
7009 // arguments, hence MEMBER_OF(2)
7010 //
7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7018 // (*) the struct this entry pertains to is the 4th element in the list
7019 // of arguments, hence MEMBER_OF(4)
7020 //
7021 // map(p, p[:100])
7022 // ===> map(p[:100])
7023 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7024
7025 // Track if the map information being generated is the first for a capture.
7026 bool IsCaptureFirstInfo = IsFirstComponentList;
7027 // When the variable is on a declare target link or in a to clause with
7028 // unified memory, a reference is needed to hold the host/device address
7029 // of the variable.
7030 bool RequiresReference = false;
7031
7032 // Scan the components from the base to the complete expression.
7033 auto CI = Components.rbegin();
7034 auto CE = Components.rend();
7035 auto I = CI;
7036
7037 // Track if the map information being generated is the first for a list of
7038 // components.
7039 bool IsExpressionFirstInfo = true;
7040 bool FirstPointerInComplexData = false;
7041 Address BP = Address::invalid();
7042 const Expr *AssocExpr = I->getAssociatedExpression();
7043 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7044 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7045 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7046
7047 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7048 return;
7049 if (isa<MemberExpr>(AssocExpr)) {
7050 // The base is the 'this' pointer. The content of the pointer is going
7051 // to be the base of the field being mapped.
7052 BP = CGF.LoadCXXThisAddress();
7053 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7054 (OASE &&
7055 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7056 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7057 } else if (OAShE &&
7058 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7059 BP = Address(
7060 CGF.EmitScalarExpr(OAShE->getBase()),
7061 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7062 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7063 } else {
7064 // The base is the reference to the variable.
7065 // BP = &Var.
7066 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7067 if (const auto *VD =
7068 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7069 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7070 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7071 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7072 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7073 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7074 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7075 RequiresReference = true;
7076 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7077 }
7078 }
7079 }
7080
7081 // If the variable is a pointer and is being dereferenced (i.e. is not
7082 // the last component), the base has to be the pointer itself, not its
7083 // reference. References are ignored for mapping purposes.
7084 QualType Ty =
7085 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7086 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7087 // No need to generate individual map information for the pointer, it
7088 // can be associated with the combined storage if shared memory mode is
7089 // active or the base declaration is not global variable.
7090 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7091 if (!AreBothBasePtrAndPteeMapped &&
7092 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7093 !VD || VD->hasLocalStorage()))
7094 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7095 else
7096 FirstPointerInComplexData = true;
7097 ++I;
7098 }
7099 }
7100
7101 // Track whether a component of the list should be marked as MEMBER_OF some
7102 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7103 // in a component list should be marked as MEMBER_OF, all subsequent entries
7104 // do not belong to the base struct. E.g.
7105 // struct S2 s;
7106 // s.ps->ps->ps->f[:]
7107 // (1) (2) (3) (4)
7108 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7109 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7110 // is the pointee of ps(2) which is not member of struct s, so it should not
7111 // be marked as such (it is still PTR_AND_OBJ).
7112 // The variable is initialized to false so that PTR_AND_OBJ entries which
7113 // are not struct members are not considered (e.g. array of pointers to
7114 // data).
7115 bool ShouldBeMemberOf = false;
7116
7117 // Variable keeping track of whether or not we have encountered a component
7118 // in the component list which is a member expression. Useful when we have a
7119 // pointer or a final array section, in which case it is the previous
7120 // component in the list which tells us whether we have a member expression.
7121 // E.g. X.f[:]
7122 // While processing the final array section "[:]" it is "f" which tells us
7123 // whether we are dealing with a member of a declared struct.
7124 const MemberExpr *EncounteredME = nullptr;
7125
7126 // Track for the total number of dimension. Start from one for the dummy
7127 // dimension.
7128 uint64_t DimSize = 1;
7129
7130 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7131 bool IsPrevMemberReference = false;
7132
7133 // We need to check if we will be encountering any MEs. If we do not
7134 // encounter any ME expression it means we will be mapping the whole struct.
7135 // In that case we need to skip adding an entry for the struct to the
7136 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7137 // list only when generating all info for clauses.
7138 bool IsMappingWholeStruct = true;
7139 if (!GenerateAllInfoForClauses) {
7140 IsMappingWholeStruct = false;
7141 } else {
7142 for (auto TempI = I; TempI != CE; ++TempI) {
7143 const MemberExpr *PossibleME =
7144 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7145 if (PossibleME) {
7146 IsMappingWholeStruct = false;
7147 break;
7148 }
7149 }
7150 }
7151
7152 for (; I != CE; ++I) {
7153 // If the current component is member of a struct (parent struct) mark it.
7154 if (!EncounteredME) {
7155 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7156 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7157 // as MEMBER_OF the parent struct.
7158 if (EncounteredME) {
7159 ShouldBeMemberOf = true;
7160 // Do not emit as complex pointer if this is actually not array-like
7161 // expression.
7162 if (FirstPointerInComplexData) {
7163 QualType Ty = std::prev(I)
7164 ->getAssociatedDeclaration()
7165 ->getType()
7166 .getNonReferenceType();
7167 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7168 FirstPointerInComplexData = false;
7169 }
7170 }
7171 }
7172
7173 auto Next = std::next(I);
7174
7175 // We need to generate the addresses and sizes if this is the last
7176 // component, if the component is a pointer or if it is an array section
7177 // whose length can't be proved to be one. If this is a pointer, it
7178 // becomes the base address for the following components.
7179
7180 // A final array section, is one whose length can't be proved to be one.
7181 // If the map item is non-contiguous then we don't treat any array section
7182 // as final array section.
7183 bool IsFinalArraySection =
7184 !IsNonContiguous &&
7185 isFinalArraySectionExpression(I->getAssociatedExpression());
7186
7187 // If we have a declaration for the mapping use that, otherwise use
7188 // the base declaration of the map clause.
7189 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7190 ? I->getAssociatedDeclaration()
7191 : BaseDecl;
7192 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7193 : MapExpr;
7194
7195 // Get information on whether the element is a pointer. Have to do a
7196 // special treatment for array sections given that they are built-in
7197 // types.
7198 const auto *OASE =
7199 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7200 const auto *OAShE =
7201 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7202 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7203 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7204 bool IsPointer =
7205 OAShE ||
7206 (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7207 .getCanonicalType()
7208 ->isAnyPointerType()) ||
7209 I->getAssociatedExpression()->getType()->isAnyPointerType();
7210 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7211 MapDecl &&
7212 MapDecl->getType()->isLValueReferenceType();
7213 bool IsNonDerefPointer = IsPointer &&
7214 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7215 !IsNonContiguous;
7216
7217 if (OASE)
7218 ++DimSize;
7219
7220 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7221 IsFinalArraySection) {
7222 // If this is not the last component, we expect the pointer to be
7223 // associated with an array expression or member expression.
7224 assert((Next == CE ||
7225 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7226 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7227 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7228 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7229 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7230 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7231 "Unexpected expression");
7232
7233 Address LB = Address::invalid();
7234 Address LowestElem = Address::invalid();
7235 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7236 const MemberExpr *E) {
7237 const Expr *BaseExpr = E->getBase();
7238 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7239 // scalar.
7240 LValue BaseLV;
7241 if (E->isArrow()) {
7242 LValueBaseInfo BaseInfo;
7243 TBAAAccessInfo TBAAInfo;
7244 Address Addr =
7245 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7246 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7247 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7248 } else {
7249 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7250 }
7251 return BaseLV;
7252 };
7253 if (OAShE) {
7254 LowestElem = LB =
7255 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7256 CGF.ConvertTypeForMem(
7257 OAShE->getBase()->getType()->getPointeeType()),
7258 CGF.getContext().getTypeAlignInChars(
7259 OAShE->getBase()->getType()));
7260 } else if (IsMemberReference) {
7261 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7262 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7263 LowestElem = CGF.EmitLValueForFieldInitialization(
7264 BaseLVal, cast<FieldDecl>(MapDecl))
7265 .getAddress();
7266 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7267 .getAddress();
7268 } else {
7269 LowestElem = LB =
7270 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7271 .getAddress();
7272 }
7273
7274 // If this component is a pointer inside the base struct then we don't
7275 // need to create any entry for it - it will be combined with the object
7276 // it is pointing to into a single PTR_AND_OBJ entry.
7277 bool IsMemberPointerOrAddr =
7278 EncounteredME &&
7279 (((IsPointer || ForDeviceAddr) &&
7280 I->getAssociatedExpression() == EncounteredME) ||
7281 (IsPrevMemberReference && !IsPointer) ||
7282 (IsMemberReference && Next != CE &&
7283 !Next->getAssociatedExpression()->getType()->isPointerType()));
7284 if (!OverlappedElements.empty() && Next == CE) {
7285 // Handle base element with the info for overlapped elements.
7286 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7287 assert(!IsPointer &&
7288 "Unexpected base element with the pointer type.");
7289 // Mark the whole struct as the struct that requires allocation on the
7290 // device.
7291 PartialStruct.LowestElem = {0, LowestElem};
7292 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7293 I->getAssociatedExpression()->getType());
7294 Address HB = CGF.Builder.CreateConstGEP(
7295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7296 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7297 TypeSize.getQuantity() - 1);
7298 PartialStruct.HighestElem = {
7299 std::numeric_limits<decltype(
7300 PartialStruct.HighestElem.first)>::max(),
7301 HB};
7302 PartialStruct.Base = BP;
7303 PartialStruct.LB = LB;
7304 assert(
7305 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7306 "Overlapped elements must be used only once for the variable.");
7307 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7308 // Emit data for non-overlapped data.
7309 OpenMPOffloadMappingFlags Flags =
7310 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7311 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7312 /*AddPtrFlag=*/false,
7313 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7314 llvm::Value *Size = nullptr;
7315 // Do bitcopy of all non-overlapped structure elements.
7316 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7317 Component : OverlappedElements) {
7318 Address ComponentLB = Address::invalid();
7319 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7320 Component) {
7321 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7322 const auto *FD = dyn_cast<FieldDecl>(VD);
7323 if (FD && FD->getType()->isLValueReferenceType()) {
7324 const auto *ME =
7325 cast<MemberExpr>(MC.getAssociatedExpression());
7326 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7327 ComponentLB =
7328 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7329 .getAddress();
7330 } else {
7331 ComponentLB =
7332 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7333 .getAddress();
7334 }
7335 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7336 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7337 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7338 LBPtr);
7339 break;
7340 }
7341 }
7342 assert(Size && "Failed to determine structure size");
7343 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7344 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7345 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7346 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7347 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7348 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7349 Size, CGF.Int64Ty, /*isSigned=*/true));
7350 CombinedInfo.Types.push_back(Flags);
7351 CombinedInfo.Mappers.push_back(nullptr);
7352 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7353 : 1);
7354 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7355 }
7356 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7357 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7358 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7359 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7360 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7361 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7362 Size = CGF.Builder.CreatePtrDiff(
7363 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7364 LBPtr);
7365 CombinedInfo.Sizes.push_back(
7366 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7367 CombinedInfo.Types.push_back(Flags);
7368 CombinedInfo.Mappers.push_back(nullptr);
7369 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7370 : 1);
7371 break;
7372 }
7373 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7374 // Skip adding an entry in the CurInfo of this combined entry if the
7375 // whole struct is currently being mapped. The struct needs to be added
7376 // in the first position before any data internal to the struct is being
7377 // mapped.
7378 if (!IsMemberPointerOrAddr ||
7379 (Next == CE && MapType != OMPC_MAP_unknown)) {
7380 if (!IsMappingWholeStruct) {
7381 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7382 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7383 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7384 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7385 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7386 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7387 Size, CGF.Int64Ty, /*isSigned=*/true));
7388 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7389 : 1);
7390 } else {
7391 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7392 StructBaseCombinedInfo.BasePointers.push_back(
7393 BP.emitRawPointer(CGF));
7394 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7395 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7396 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7397 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7398 Size, CGF.Int64Ty, /*isSigned=*/true));
7399 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7400 IsNonContiguous ? DimSize : 1);
7401 }
7402
7403 // If Mapper is valid, the last component inherits the mapper.
7404 bool HasMapper = Mapper && Next == CE;
7405 if (!IsMappingWholeStruct)
7406 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7407 else
7408 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7409 : nullptr);
7410
7411 // We need to add a pointer flag for each map that comes from the
7412 // same expression except for the first one. We also need to signal
7413 // this map is the first one that relates with the current capture
7414 // (there is a set of entries for each capture).
7415 OpenMPOffloadMappingFlags Flags =
7416 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7417 !IsExpressionFirstInfo || RequiresReference ||
7418 FirstPointerInComplexData || IsMemberReference,
7419 AreBothBasePtrAndPteeMapped ||
7420 (IsCaptureFirstInfo && !RequiresReference),
7421 IsNonContiguous);
7422
7423 if (!IsExpressionFirstInfo || IsMemberReference) {
7424 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7425 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7426 if (IsPointer || (IsMemberReference && Next != CE))
7427 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7428 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7429 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7430 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7431 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7432
7433 if (ShouldBeMemberOf) {
7434 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7435 // should be later updated with the correct value of MEMBER_OF.
7436 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7437 // From now on, all subsequent PTR_AND_OBJ entries should not be
7438 // marked as MEMBER_OF.
7439 ShouldBeMemberOf = false;
7440 }
7441 }
7442
7443 if (!IsMappingWholeStruct)
7444 CombinedInfo.Types.push_back(Flags);
7445 else
7446 StructBaseCombinedInfo.Types.push_back(Flags);
7447 }
7448
7449 // If we have encountered a member expression so far, keep track of the
7450 // mapped member. If the parent is "*this", then the value declaration
7451 // is nullptr.
7452 if (EncounteredME) {
7453 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7454 unsigned FieldIndex = FD->getFieldIndex();
7455
7456 // Update info about the lowest and highest elements for this struct
7457 if (!PartialStruct.Base.isValid()) {
7458 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7459 if (IsFinalArraySection) {
7460 Address HB =
7461 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7462 .getAddress();
7463 PartialStruct.HighestElem = {FieldIndex, HB};
7464 } else {
7465 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7466 }
7467 PartialStruct.Base = BP;
7468 PartialStruct.LB = BP;
7469 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7470 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7471 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7472 if (IsFinalArraySection) {
7473 Address HB =
7474 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7475 .getAddress();
7476 PartialStruct.HighestElem = {FieldIndex, HB};
7477 } else {
7478 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7479 }
7480 }
7481 }
7482
7483 // Need to emit combined struct for array sections.
7484 if (IsFinalArraySection || IsNonContiguous)
7485 PartialStruct.IsArraySection = true;
7486
7487 // If we have a final array section, we are done with this expression.
7488 if (IsFinalArraySection)
7489 break;
7490
7491 // The pointer becomes the base for the next element.
7492 if (Next != CE)
7493 BP = IsMemberReference ? LowestElem : LB;
7494
7495 IsExpressionFirstInfo = false;
7496 IsCaptureFirstInfo = false;
7497 FirstPointerInComplexData = false;
7498 IsPrevMemberReference = IsMemberReference;
7499 } else if (FirstPointerInComplexData) {
7500 QualType Ty = Components.rbegin()
7501 ->getAssociatedDeclaration()
7502 ->getType()
7503 .getNonReferenceType();
7504 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7505 FirstPointerInComplexData = false;
7506 }
7507 }
7508 // If ran into the whole component - allocate the space for the whole
7509 // record.
7510 if (!EncounteredME)
7511 PartialStruct.HasCompleteRecord = true;
7512
7513 if (!IsNonContiguous)
7514 return;
7515
7516 const ASTContext &Context = CGF.getContext();
7517
7518 // For supporting stride in array section, we need to initialize the first
7519 // dimension size as 1, first offset as 0, and first count as 1
7520 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7521 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7522 MapValuesArrayTy CurStrides;
7523 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7524 uint64_t ElementTypeSize;
7525
7526 // Collect Size information for each dimension and get the element size as
7527 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7528 // should be [10, 10] and the first stride is 4 btyes.
7529 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7530 Components) {
7531 const Expr *AssocExpr = Component.getAssociatedExpression();
7532 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7533
7534 if (!OASE)
7535 continue;
7536
7537 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7538 auto *CAT = Context.getAsConstantArrayType(Ty);
7539 auto *VAT = Context.getAsVariableArrayType(Ty);
7540
7541 // We need all the dimension size except for the last dimension.
7542 assert((VAT || CAT || &Component == &*Components.begin()) &&
7543 "Should be either ConstantArray or VariableArray if not the "
7544 "first Component");
7545
7546 // Get element size if CurStrides is empty.
7547 if (CurStrides.empty()) {
7548 const Type *ElementType = nullptr;
7549 if (CAT)
7550 ElementType = CAT->getElementType().getTypePtr();
7551 else if (VAT)
7552 ElementType = VAT->getElementType().getTypePtr();
7553 else
7554 assert(&Component == &*Components.begin() &&
7555 "Only expect pointer (non CAT or VAT) when this is the "
7556 "first Component");
7557 // If ElementType is null, then it means the base is a pointer
7558 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7559 // for next iteration.
7560 if (ElementType) {
7561 // For the case that having pointer as base, we need to remove one
7562 // level of indirection.
7563 if (&Component != &*Components.begin())
7564 ElementType = ElementType->getPointeeOrArrayElementType();
7565 ElementTypeSize =
7566 Context.getTypeSizeInChars(ElementType).getQuantity();
7567 CurStrides.push_back(
7568 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7569 }
7570 }
7571 // Get dimension value except for the last dimension since we don't need
7572 // it.
7573 if (DimSizes.size() < Components.size() - 1) {
7574 if (CAT)
7575 DimSizes.push_back(
7576 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7577 else if (VAT)
7578 DimSizes.push_back(CGF.Builder.CreateIntCast(
7579 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7580 /*IsSigned=*/false));
7581 }
7582 }
7583
7584 // Skip the dummy dimension since we have already have its information.
7585 auto *DI = DimSizes.begin() + 1;
7586 // Product of dimension.
7587 llvm::Value *DimProd =
7588 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7589
7590 // Collect info for non-contiguous. Notice that offset, count, and stride
7591 // are only meaningful for array-section, so we insert a null for anything
7592 // other than array-section.
7593 // Also, the size of offset, count, and stride are not the same as
7594 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7595 // count, and stride are the same as the number of non-contiguous
7596 // declaration in target update to/from clause.
7597 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7598 Components) {
7599 const Expr *AssocExpr = Component.getAssociatedExpression();
7600
7601 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7602 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7603 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7604 /*isSigned=*/false);
7605 CurOffsets.push_back(Offset);
7606 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7607 CurStrides.push_back(CurStrides.back());
7608 continue;
7609 }
7610
7611 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7612
7613 if (!OASE)
7614 continue;
7615
7616 // Offset
7617 const Expr *OffsetExpr = OASE->getLowerBound();
7618 llvm::Value *Offset = nullptr;
7619 if (!OffsetExpr) {
7620 // If offset is absent, then we just set it to zero.
7621 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7622 } else {
7623 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7624 CGF.Int64Ty,
7625 /*isSigned=*/false);
7626 }
7627 CurOffsets.push_back(Offset);
7628
7629 // Count
7630 const Expr *CountExpr = OASE->getLength();
7631 llvm::Value *Count = nullptr;
7632 if (!CountExpr) {
7633 // In Clang, once a high dimension is an array section, we construct all
7634 // the lower dimension as array section, however, for case like
7635 // arr[0:2][2], Clang construct the inner dimension as an array section
7636 // but it actually is not in an array section form according to spec.
7637 if (!OASE->getColonLocFirst().isValid() &&
7638 !OASE->getColonLocSecond().isValid()) {
7639 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7640 } else {
7641 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7642 // When the length is absent it defaults to ⌈(size −
7643 // lower-bound)/stride⌉, where size is the size of the array
7644 // dimension.
7645 const Expr *StrideExpr = OASE->getStride();
7646 llvm::Value *Stride =
7647 StrideExpr
7648 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7649 CGF.Int64Ty, /*isSigned=*/false)
7650 : nullptr;
7651 if (Stride)
7652 Count = CGF.Builder.CreateUDiv(
7653 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7654 else
7655 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7656 }
7657 } else {
7658 Count = CGF.EmitScalarExpr(CountExpr);
7659 }
7660 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7661 CurCounts.push_back(Count);
7662
7663 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7664 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7665 // Offset Count Stride
7666 // D0 0 1 4 (int) <- dummy dimension
7667 // D1 0 2 8 (2 * (1) * 4)
7668 // D2 1 2 20 (1 * (1 * 5) * 4)
7669 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7670 const Expr *StrideExpr = OASE->getStride();
7671 llvm::Value *Stride =
7672 StrideExpr
7673 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7674 CGF.Int64Ty, /*isSigned=*/false)
7675 : nullptr;
7676 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7677 if (Stride)
7678 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7679 else
7680 CurStrides.push_back(DimProd);
7681 if (DI != DimSizes.end())
7682 ++DI;
7683 }
7684
7685 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7686 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7687 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7688 }
7689
7690 /// Return the adjusted map modifiers if the declaration a capture refers to
7691 /// appears in a first-private clause. This is expected to be used only with
7692 /// directives that start with 'target'.
7693 OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7694 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7695 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7696
7697 // A first private variable captured by reference will use only the
7698 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7699 // declaration is known as first-private in this handler.
7700 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7701 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7702 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7703 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7704 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7705 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7706 }
7707 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7708 if (I != LambdasMap.end())
7709 // for map(to: lambda): using user specified map type.
7710 return getMapTypeBits(
7711 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7712 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7713 /*AddPtrFlag=*/false,
7714 /*AddIsTargetParamFlag=*/false,
7715 /*isNonContiguous=*/false);
7716 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7717 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7718 }
7719
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7720 void getPlainLayout(const CXXRecordDecl *RD,
7721 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7722 bool AsBase) const {
7723 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7724
7725 llvm::StructType *St =
7726 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7727
7728 unsigned NumElements = St->getNumElements();
7729 llvm::SmallVector<
7730 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7731 RecordLayout(NumElements);
7732
7733 // Fill bases.
7734 for (const auto &I : RD->bases()) {
7735 if (I.isVirtual())
7736 continue;
7737
7738 QualType BaseTy = I.getType();
7739 const auto *Base = BaseTy->getAsCXXRecordDecl();
7740 // Ignore empty bases.
7741 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7742 CGF.getContext()
7743 .getASTRecordLayout(Base)
7744 .getNonVirtualSize()
7745 .isZero())
7746 continue;
7747
7748 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7749 RecordLayout[FieldIndex] = Base;
7750 }
7751 // Fill in virtual bases.
7752 for (const auto &I : RD->vbases()) {
7753 QualType BaseTy = I.getType();
7754 // Ignore empty bases.
7755 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7756 continue;
7757
7758 const auto *Base = BaseTy->getAsCXXRecordDecl();
7759 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7760 if (RecordLayout[FieldIndex])
7761 continue;
7762 RecordLayout[FieldIndex] = Base;
7763 }
7764 // Fill in all the fields.
7765 assert(!RD->isUnion() && "Unexpected union.");
7766 for (const auto *Field : RD->fields()) {
7767 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7768 // will fill in later.)
7769 if (!Field->isBitField() &&
7770 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7771 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7772 RecordLayout[FieldIndex] = Field;
7773 }
7774 }
7775 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7776 &Data : RecordLayout) {
7777 if (Data.isNull())
7778 continue;
7779 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7780 getPlainLayout(Base, Layout, /*AsBase=*/true);
7781 else
7782 Layout.push_back(Data.get<const FieldDecl *>());
7783 }
7784 }
7785
7786 /// Generate all the base pointers, section pointers, sizes, map types, and
7787 /// mappers for the extracted mappable expressions (all included in \a
7788 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7789 /// pair of the relevant declaration and index where it occurs is appended to
7790 /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const7791 void generateAllInfoForClauses(
7792 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7793 llvm::OpenMPIRBuilder &OMPBuilder,
7794 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7795 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7796 // We have to process the component lists that relate with the same
7797 // declaration in a single chunk so that we can generate the map flags
7798 // correctly. Therefore, we organize all lists in a map.
7799 enum MapKind { Present, Allocs, Other, Total };
7800 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7801 SmallVector<SmallVector<MapInfo, 8>, 4>>
7802 Info;
7803
7804 // Helper function to fill the information map for the different supported
7805 // clauses.
7806 auto &&InfoGen =
7807 [&Info, &SkipVarSet](
7808 const ValueDecl *D, MapKind Kind,
7809 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7810 OpenMPMapClauseKind MapType,
7811 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7812 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7813 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7814 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7815 if (SkipVarSet.contains(D))
7816 return;
7817 auto It = Info.find(D);
7818 if (It == Info.end())
7819 It = Info
7820 .insert(std::make_pair(
7821 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7822 .first;
7823 It->second[Kind].emplace_back(
7824 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7825 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7826 };
7827
7828 for (const auto *Cl : Clauses) {
7829 const auto *C = dyn_cast<OMPMapClause>(Cl);
7830 if (!C)
7831 continue;
7832 MapKind Kind = Other;
7833 if (llvm::is_contained(C->getMapTypeModifiers(),
7834 OMPC_MAP_MODIFIER_present))
7835 Kind = Present;
7836 else if (C->getMapType() == OMPC_MAP_alloc)
7837 Kind = Allocs;
7838 const auto *EI = C->getVarRefs().begin();
7839 for (const auto L : C->component_lists()) {
7840 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7841 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7842 C->getMapTypeModifiers(), std::nullopt,
7843 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7844 E);
7845 ++EI;
7846 }
7847 }
7848 for (const auto *Cl : Clauses) {
7849 const auto *C = dyn_cast<OMPToClause>(Cl);
7850 if (!C)
7851 continue;
7852 MapKind Kind = Other;
7853 if (llvm::is_contained(C->getMotionModifiers(),
7854 OMPC_MOTION_MODIFIER_present))
7855 Kind = Present;
7856 const auto *EI = C->getVarRefs().begin();
7857 for (const auto L : C->component_lists()) {
7858 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7859 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7860 C->isImplicit(), std::get<2>(L), *EI);
7861 ++EI;
7862 }
7863 }
7864 for (const auto *Cl : Clauses) {
7865 const auto *C = dyn_cast<OMPFromClause>(Cl);
7866 if (!C)
7867 continue;
7868 MapKind Kind = Other;
7869 if (llvm::is_contained(C->getMotionModifiers(),
7870 OMPC_MOTION_MODIFIER_present))
7871 Kind = Present;
7872 const auto *EI = C->getVarRefs().begin();
7873 for (const auto L : C->component_lists()) {
7874 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7875 std::nullopt, C->getMotionModifiers(),
7876 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7877 *EI);
7878 ++EI;
7879 }
7880 }
7881
7882 // Look at the use_device_ptr and use_device_addr clauses information and
7883 // mark the existing map entries as such. If there is no map information for
7884 // an entry in the use_device_ptr and use_device_addr list, we create one
7885 // with map type 'alloc' and zero size section. It is the user fault if that
7886 // was not mapped before. If there is no map information and the pointer is
7887 // a struct member, then we defer the emission of that entry until the whole
7888 // struct has been processed.
7889 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7890 SmallVector<DeferredDevicePtrEntryTy, 4>>
7891 DeferredInfo;
7892 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7893
7894 auto &&UseDeviceDataCombinedInfoGen =
7895 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7896 CodeGenFunction &CGF, bool IsDevAddr) {
7897 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7898 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7899 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7900 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7901 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7902 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7903 UseDeviceDataCombinedInfo.Sizes.push_back(
7904 llvm::Constant::getNullValue(CGF.Int64Ty));
7905 UseDeviceDataCombinedInfo.Types.push_back(
7906 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7907 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7908 };
7909
7910 auto &&MapInfoGen =
7911 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7912 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7913 OMPClauseMappableExprCommon::MappableExprComponentListRef
7914 Components,
7915 bool IsImplicit, bool IsDevAddr) {
7916 // We didn't find any match in our map information - generate a zero
7917 // size array section - if the pointer is a struct member we defer
7918 // this action until the whole struct has been processed.
7919 if (isa<MemberExpr>(IE)) {
7920 // Insert the pointer into Info to be processed by
7921 // generateInfoForComponentList. Because it is a member pointer
7922 // without a pointee, no entry will be generated for it, therefore
7923 // we need to generate one after the whole struct has been
7924 // processed. Nonetheless, generateInfoForComponentList must be
7925 // called to take the pointer into account for the calculation of
7926 // the range of the partial struct.
7927 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7928 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7929 nullptr, nullptr, IsDevAddr);
7930 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7931 } else {
7932 llvm::Value *Ptr;
7933 if (IsDevAddr) {
7934 if (IE->isGLValue())
7935 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7936 else
7937 Ptr = CGF.EmitScalarExpr(IE);
7938 } else {
7939 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7940 }
7941 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7942 }
7943 };
7944
7945 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7946 const Expr *IE, bool IsDevAddr) -> bool {
7947 // We potentially have map information for this declaration already.
7948 // Look for the first set of components that refer to it. If found,
7949 // return true.
7950 // If the first component is a member expression, we have to look into
7951 // 'this', which maps to null in the map of map information. Otherwise
7952 // look directly for the information.
7953 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7954 if (It != Info.end()) {
7955 bool Found = false;
7956 for (auto &Data : It->second) {
7957 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7958 return MI.Components.back().getAssociatedDeclaration() == VD;
7959 });
7960 // If we found a map entry, signal that the pointer has to be
7961 // returned and move on to the next declaration. Exclude cases where
7962 // the base pointer is mapped as array subscript, array section or
7963 // array shaping. The base address is passed as a pointer to base in
7964 // this case and cannot be used as a base for use_device_ptr list
7965 // item.
7966 if (CI != Data.end()) {
7967 if (IsDevAddr) {
7968 CI->ForDeviceAddr = IsDevAddr;
7969 CI->ReturnDevicePointer = true;
7970 Found = true;
7971 break;
7972 } else {
7973 auto PrevCI = std::next(CI->Components.rbegin());
7974 const auto *VarD = dyn_cast<VarDecl>(VD);
7975 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7976 isa<MemberExpr>(IE) ||
7977 !VD->getType().getNonReferenceType()->isPointerType() ||
7978 PrevCI == CI->Components.rend() ||
7979 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7980 VarD->hasLocalStorage()) {
7981 CI->ForDeviceAddr = IsDevAddr;
7982 CI->ReturnDevicePointer = true;
7983 Found = true;
7984 break;
7985 }
7986 }
7987 }
7988 }
7989 return Found;
7990 }
7991 return false;
7992 };
7993
7994 // Look at the use_device_ptr clause information and mark the existing map
7995 // entries as such. If there is no map information for an entry in the
7996 // use_device_ptr list, we create one with map type 'alloc' and zero size
7997 // section. It is the user fault if that was not mapped before. If there is
7998 // no map information and the pointer is a struct member, then we defer the
7999 // emission of that entry until the whole struct has been processed.
8000 for (const auto *Cl : Clauses) {
8001 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8002 if (!C)
8003 continue;
8004 for (const auto L : C->component_lists()) {
8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8006 std::get<1>(L);
8007 assert(!Components.empty() &&
8008 "Not expecting empty list of components!");
8009 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8010 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8011 const Expr *IE = Components.back().getAssociatedExpression();
8012 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8013 continue;
8014 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8015 /*IsDevAddr=*/false);
8016 }
8017 }
8018
8019 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8020 for (const auto *Cl : Clauses) {
8021 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8022 if (!C)
8023 continue;
8024 for (const auto L : C->component_lists()) {
8025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8026 std::get<1>(L);
8027 assert(!std::get<1>(L).empty() &&
8028 "Not expecting empty list of components!");
8029 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8030 if (!Processed.insert(VD).second)
8031 continue;
8032 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8033 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8034 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8035 continue;
8036 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8037 /*IsDevAddr=*/true);
8038 }
8039 }
8040
8041 for (const auto &Data : Info) {
8042 StructRangeInfoTy PartialStruct;
8043 // Current struct information:
8044 MapCombinedInfoTy CurInfo;
8045 // Current struct base information:
8046 MapCombinedInfoTy StructBaseCurInfo;
8047 const Decl *D = Data.first;
8048 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8049 bool HasMapBasePtr = false;
8050 bool HasMapArraySec = false;
8051 if (VD && VD->getType()->isAnyPointerType()) {
8052 for (const auto &M : Data.second) {
8053 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8054 return isa_and_present<DeclRefExpr>(L.VarRef);
8055 });
8056 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8057 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8058 L.VarRef);
8059 });
8060 if (HasMapBasePtr && HasMapArraySec)
8061 break;
8062 }
8063 }
8064 for (const auto &M : Data.second) {
8065 for (const MapInfo &L : M) {
8066 assert(!L.Components.empty() &&
8067 "Not expecting declaration with no component lists.");
8068
8069 // Remember the current base pointer index.
8070 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8071 unsigned StructBasePointersIdx =
8072 StructBaseCurInfo.BasePointers.size();
8073 CurInfo.NonContigInfo.IsNonContiguous =
8074 L.Components.back().isNonContiguous();
8075 generateInfoForComponentList(
8076 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8077 CurInfo, StructBaseCurInfo, PartialStruct,
8078 /*IsFirstComponentList=*/false, L.IsImplicit,
8079 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8080 L.VarRef, /*OverlappedElements*/ std::nullopt,
8081 HasMapBasePtr && HasMapArraySec);
8082
8083 // If this entry relates to a device pointer, set the relevant
8084 // declaration and add the 'return pointer' flag.
8085 if (L.ReturnDevicePointer) {
8086 // Check whether a value was added to either CurInfo or
8087 // StructBaseCurInfo and error if no value was added to either of
8088 // them:
8089 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8090 StructBasePointersIdx <
8091 StructBaseCurInfo.BasePointers.size()) &&
8092 "Unexpected number of mapped base pointers.");
8093
8094 // Choose a base pointer index which is always valid:
8095 const ValueDecl *RelevantVD =
8096 L.Components.back().getAssociatedDeclaration();
8097 assert(RelevantVD &&
8098 "No relevant declaration related with device pointer??");
8099
8100 // If StructBaseCurInfo has been updated this iteration then work on
8101 // the first new entry added to it i.e. make sure that when multiple
8102 // values are added to any of the lists, the first value added is
8103 // being modified by the assignments below (not the last value
8104 // added).
8105 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8106 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8107 RelevantVD;
8108 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8109 L.ForDeviceAddr ? DeviceInfoTy::Address
8110 : DeviceInfoTy::Pointer;
8111 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8113 } else {
8114 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8115 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8116 L.ForDeviceAddr ? DeviceInfoTy::Address
8117 : DeviceInfoTy::Pointer;
8118 CurInfo.Types[CurrentBasePointersIdx] |=
8119 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8120 }
8121 }
8122 }
8123 }
8124
8125 // Append any pending zero-length pointers which are struct members and
8126 // used with use_device_ptr or use_device_addr.
8127 auto CI = DeferredInfo.find(Data.first);
8128 if (CI != DeferredInfo.end()) {
8129 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8130 llvm::Value *BasePtr;
8131 llvm::Value *Ptr;
8132 if (L.ForDeviceAddr) {
8133 if (L.IE->isGLValue())
8134 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8135 else
8136 Ptr = this->CGF.EmitScalarExpr(L.IE);
8137 BasePtr = Ptr;
8138 // Entry is RETURN_PARAM. Also, set the placeholder value
8139 // MEMBER_OF=FFFF so that the entry is later updated with the
8140 // correct value of MEMBER_OF.
8141 CurInfo.Types.push_back(
8142 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8143 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8144 } else {
8145 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8146 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8147 L.IE->getExprLoc());
8148 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8149 // placeholder value MEMBER_OF=FFFF so that the entry is later
8150 // updated with the correct value of MEMBER_OF.
8151 CurInfo.Types.push_back(
8152 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8153 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8154 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8155 }
8156 CurInfo.Exprs.push_back(L.VD);
8157 CurInfo.BasePointers.emplace_back(BasePtr);
8158 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8159 CurInfo.DevicePointers.emplace_back(
8160 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8161 CurInfo.Pointers.push_back(Ptr);
8162 CurInfo.Sizes.push_back(
8163 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8164 CurInfo.Mappers.push_back(nullptr);
8165 }
8166 }
8167
8168 // Unify entries in one list making sure the struct mapping precedes the
8169 // individual fields:
8170 MapCombinedInfoTy UnionCurInfo;
8171 UnionCurInfo.append(StructBaseCurInfo);
8172 UnionCurInfo.append(CurInfo);
8173
8174 // If there is an entry in PartialStruct it means we have a struct with
8175 // individual members mapped. Emit an extra combined entry.
8176 if (PartialStruct.Base.isValid()) {
8177 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8178 // Emit a combined entry:
8179 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8180 /*IsMapThis*/ !VD, OMPBuilder, VD);
8181 }
8182
8183 // We need to append the results of this capture to what we already have.
8184 CombinedInfo.append(UnionCurInfo);
8185 }
8186 // Append data for use_device_ptr clauses.
8187 CombinedInfo.append(UseDeviceDataCombinedInfo);
8188 }
8189
8190 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8191 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8192 : CurDir(&Dir), CGF(CGF) {
8193 // Extract firstprivate clause information.
8194 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8195 for (const auto *D : C->varlists())
8196 FirstPrivateDecls.try_emplace(
8197 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8198 // Extract implicit firstprivates from uses_allocators clauses.
8199 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8200 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8201 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8202 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8203 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8204 /*Implicit=*/true);
8205 else if (const auto *VD = dyn_cast<VarDecl>(
8206 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8207 ->getDecl()))
8208 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8209 }
8210 }
8211 // Extract device pointer clause information.
8212 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8213 for (auto L : C->component_lists())
8214 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8215 // Extract device addr clause information.
8216 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8217 for (auto L : C->component_lists())
8218 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8219 // Extract map information.
8220 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8221 if (C->getMapType() != OMPC_MAP_to)
8222 continue;
8223 for (auto L : C->component_lists()) {
8224 const ValueDecl *VD = std::get<0>(L);
8225 const auto *RD = VD ? VD->getType()
8226 .getCanonicalType()
8227 .getNonReferenceType()
8228 ->getAsCXXRecordDecl()
8229 : nullptr;
8230 if (RD && RD->isLambda())
8231 LambdasMap.try_emplace(std::get<0>(L), C);
8232 }
8233 }
8234 }
8235
8236 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8237 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8238 : CurDir(&Dir), CGF(CGF) {}
8239
8240 /// Generate code for the combined entry if we have a partially mapped struct
8241 /// and take care of the mapping flags of the arguments corresponding to
8242 /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,bool IsMapThis,llvm::OpenMPIRBuilder & OMPBuilder,const ValueDecl * VD=nullptr,bool NotTargetParams=true) const8243 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8244 MapFlagsArrayTy &CurTypes,
8245 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8246 llvm::OpenMPIRBuilder &OMPBuilder,
8247 const ValueDecl *VD = nullptr,
8248 bool NotTargetParams = true) const {
8249 if (CurTypes.size() == 1 &&
8250 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8251 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8252 !PartialStruct.IsArraySection)
8253 return;
8254 Address LBAddr = PartialStruct.LowestElem.second;
8255 Address HBAddr = PartialStruct.HighestElem.second;
8256 if (PartialStruct.HasCompleteRecord) {
8257 LBAddr = PartialStruct.LB;
8258 HBAddr = PartialStruct.LB;
8259 }
8260 CombinedInfo.Exprs.push_back(VD);
8261 // Base is the base of the struct
8262 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8263 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8264 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8265 // Pointer is the address of the lowest element
8266 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8267 const CXXMethodDecl *MD =
8268 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8269 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8270 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8271 // There should not be a mapper for a combined entry.
8272 if (HasBaseClass) {
8273 // OpenMP 5.2 148:21:
8274 // If the target construct is within a class non-static member function,
8275 // and a variable is an accessible data member of the object for which the
8276 // non-static data member function is invoked, the variable is treated as
8277 // if the this[:1] expression had appeared in a map clause with a map-type
8278 // of tofrom.
8279 // Emit this[:1]
8280 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8281 QualType Ty = MD->getFunctionObjectParameterType();
8282 llvm::Value *Size =
8283 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8284 /*isSigned=*/true);
8285 CombinedInfo.Sizes.push_back(Size);
8286 } else {
8287 CombinedInfo.Pointers.push_back(LB);
8288 // Size is (addr of {highest+1} element) - (addr of lowest element)
8289 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8290 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8291 HBAddr.getElementType(), HB, /*Idx0=*/1);
8292 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8293 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8294 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8295 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8296 /*isSigned=*/false);
8297 CombinedInfo.Sizes.push_back(Size);
8298 }
8299 CombinedInfo.Mappers.push_back(nullptr);
8300 // Map type is always TARGET_PARAM, if generate info for captures.
8301 CombinedInfo.Types.push_back(
8302 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8303 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8304 // If any element has the present modifier, then make sure the runtime
8305 // doesn't attempt to allocate the struct.
8306 if (CurTypes.end() !=
8307 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8308 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8309 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8310 }))
8311 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8312 // Remove TARGET_PARAM flag from the first element
8313 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8314 // If any element has the ompx_hold modifier, then make sure the runtime
8315 // uses the hold reference count for the struct as a whole so that it won't
8316 // be unmapped by an extra dynamic reference count decrement. Add it to all
8317 // elements as well so the runtime knows which reference count to check
8318 // when determining whether it's time for device-to-host transfers of
8319 // individual elements.
8320 if (CurTypes.end() !=
8321 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8322 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8323 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8324 })) {
8325 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8326 for (auto &M : CurTypes)
8327 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8328 }
8329
8330 // All other current entries will be MEMBER_OF the combined entry
8331 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8332 // 0xFFFF in the MEMBER_OF field).
8333 OpenMPOffloadMappingFlags MemberOfFlag =
8334 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8335 for (auto &M : CurTypes)
8336 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8337 }
8338
8339 /// Generate all the base pointers, section pointers, sizes, map types, and
8340 /// mappers for the extracted mappable expressions (all included in \a
8341 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8342 /// pair of the relevant declaration and index where it occurs is appended to
8343 /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8344 void generateAllInfo(
8345 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8346 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8347 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8348 assert(CurDir.is<const OMPExecutableDirective *>() &&
8349 "Expect a executable directive");
8350 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8351 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8352 SkipVarSet);
8353 }
8354
8355 /// Generate all the base pointers, section pointers, sizes, map types, and
8356 /// mappers for the extracted map clauses of user-defined mapper (all included
8357 /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder) const8358 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8359 llvm::OpenMPIRBuilder &OMPBuilder) const {
8360 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8361 "Expect a declare mapper directive");
8362 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8363 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8364 OMPBuilder);
8365 }
8366
8367 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8368 void generateInfoForLambdaCaptures(
8369 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8370 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8371 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8372 const auto *RD = VDType->getAsCXXRecordDecl();
8373 if (!RD || !RD->isLambda())
8374 return;
8375 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8376 CGF.getContext().getDeclAlign(VD));
8377 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8378 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8379 FieldDecl *ThisCapture = nullptr;
8380 RD->getCaptureFields(Captures, ThisCapture);
8381 if (ThisCapture) {
8382 LValue ThisLVal =
8383 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8384 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8385 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8386 VDLVal.getPointer(CGF));
8387 CombinedInfo.Exprs.push_back(VD);
8388 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8389 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8390 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8391 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8392 CombinedInfo.Sizes.push_back(
8393 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8394 CGF.Int64Ty, /*isSigned=*/true));
8395 CombinedInfo.Types.push_back(
8396 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8397 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8398 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8399 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8400 CombinedInfo.Mappers.push_back(nullptr);
8401 }
8402 for (const LambdaCapture &LC : RD->captures()) {
8403 if (!LC.capturesVariable())
8404 continue;
8405 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8406 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8407 continue;
8408 auto It = Captures.find(VD);
8409 assert(It != Captures.end() && "Found lambda capture without field.");
8410 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8411 if (LC.getCaptureKind() == LCK_ByRef) {
8412 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8413 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8414 VDLVal.getPointer(CGF));
8415 CombinedInfo.Exprs.push_back(VD);
8416 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8417 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8418 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8419 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8420 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8421 CGF.getTypeSize(
8422 VD->getType().getCanonicalType().getNonReferenceType()),
8423 CGF.Int64Ty, /*isSigned=*/true));
8424 } else {
8425 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8426 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8427 VDLVal.getPointer(CGF));
8428 CombinedInfo.Exprs.push_back(VD);
8429 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8430 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8431 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8432 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8433 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8434 }
8435 CombinedInfo.Types.push_back(
8436 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8437 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8438 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8439 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8440 CombinedInfo.Mappers.push_back(nullptr);
8441 }
8442 }
8443
8444 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8445 void adjustMemberOfForLambdaCaptures(
8446 llvm::OpenMPIRBuilder &OMPBuilder,
8447 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8448 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8449 MapFlagsArrayTy &Types) const {
8450 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8451 // Set correct member_of idx for all implicit lambda captures.
8452 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8453 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8454 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8455 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8456 continue;
8457 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8458 assert(BasePtr && "Unable to find base lambda address.");
8459 int TgtIdx = -1;
8460 for (unsigned J = I; J > 0; --J) {
8461 unsigned Idx = J - 1;
8462 if (Pointers[Idx] != BasePtr)
8463 continue;
8464 TgtIdx = Idx;
8465 break;
8466 }
8467 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8468 // All other current entries will be MEMBER_OF the combined entry
8469 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8470 // 0xFFFF in the MEMBER_OF field).
8471 OpenMPOffloadMappingFlags MemberOfFlag =
8472 OMPBuilder.getMemberOfFlag(TgtIdx);
8473 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8474 }
8475 }
8476
8477 /// Generate the base pointers, section pointers, sizes, map types, and
8478 /// mappers associated to a given capture (all included in \a CombinedInfo).
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct) const8479 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8480 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8481 StructRangeInfoTy &PartialStruct) const {
8482 assert(!Cap->capturesVariableArrayType() &&
8483 "Not expecting to generate map info for a variable array type!");
8484
8485 // We need to know when we generating information for the first component
8486 const ValueDecl *VD = Cap->capturesThis()
8487 ? nullptr
8488 : Cap->getCapturedVar()->getCanonicalDecl();
8489
8490 // for map(to: lambda): skip here, processing it in
8491 // generateDefaultMapInfo
8492 if (LambdasMap.count(VD))
8493 return;
8494
8495 // If this declaration appears in a is_device_ptr clause we just have to
8496 // pass the pointer by value. If it is a reference to a declaration, we just
8497 // pass its value.
8498 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8499 CombinedInfo.Exprs.push_back(VD);
8500 CombinedInfo.BasePointers.emplace_back(Arg);
8501 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8502 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8503 CombinedInfo.Pointers.push_back(Arg);
8504 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8505 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8506 /*isSigned=*/true));
8507 CombinedInfo.Types.push_back(
8508 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8510 CombinedInfo.Mappers.push_back(nullptr);
8511 return;
8512 }
8513
8514 using MapData =
8515 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8516 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8517 const ValueDecl *, const Expr *>;
8518 SmallVector<MapData, 4> DeclComponentLists;
8519 // For member fields list in is_device_ptr, store it in
8520 // DeclComponentLists for generating components info.
8521 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8522 auto It = DevPointersMap.find(VD);
8523 if (It != DevPointersMap.end())
8524 for (const auto &MCL : It->second)
8525 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8526 /*IsImpicit = */ true, nullptr,
8527 nullptr);
8528 auto I = HasDevAddrsMap.find(VD);
8529 if (I != HasDevAddrsMap.end())
8530 for (const auto &MCL : I->second)
8531 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8532 /*IsImpicit = */ true, nullptr,
8533 nullptr);
8534 assert(CurDir.is<const OMPExecutableDirective *>() &&
8535 "Expect a executable directive");
8536 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8537 bool HasMapBasePtr = false;
8538 bool HasMapArraySec = false;
8539 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8540 const auto *EI = C->getVarRefs().begin();
8541 for (const auto L : C->decl_component_lists(VD)) {
8542 const ValueDecl *VDecl, *Mapper;
8543 // The Expression is not correct if the mapping is implicit
8544 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8545 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8546 std::tie(VDecl, Components, Mapper) = L;
8547 assert(VDecl == VD && "We got information for the wrong declaration??");
8548 assert(!Components.empty() &&
8549 "Not expecting declaration with no component lists.");
8550 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8551 HasMapBasePtr = true;
8552 if (VD && E && VD->getType()->isAnyPointerType() &&
8553 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8554 HasMapArraySec = true;
8555 DeclComponentLists.emplace_back(Components, C->getMapType(),
8556 C->getMapTypeModifiers(),
8557 C->isImplicit(), Mapper, E);
8558 ++EI;
8559 }
8560 }
8561 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8562 const MapData &RHS) {
8563 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8564 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8565 bool HasPresent =
8566 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8567 bool HasAllocs = MapType == OMPC_MAP_alloc;
8568 MapModifiers = std::get<2>(RHS);
8569 MapType = std::get<1>(LHS);
8570 bool HasPresentR =
8571 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8572 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8573 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8574 });
8575
8576 // Find overlapping elements (including the offset from the base element).
8577 llvm::SmallDenseMap<
8578 const MapData *,
8579 llvm::SmallVector<
8580 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8581 4>
8582 OverlappedData;
8583 size_t Count = 0;
8584 for (const MapData &L : DeclComponentLists) {
8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8586 OpenMPMapClauseKind MapType;
8587 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8588 bool IsImplicit;
8589 const ValueDecl *Mapper;
8590 const Expr *VarRef;
8591 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8592 L;
8593 ++Count;
8594 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8595 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8596 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8597 VarRef) = L1;
8598 auto CI = Components.rbegin();
8599 auto CE = Components.rend();
8600 auto SI = Components1.rbegin();
8601 auto SE = Components1.rend();
8602 for (; CI != CE && SI != SE; ++CI, ++SI) {
8603 if (CI->getAssociatedExpression()->getStmtClass() !=
8604 SI->getAssociatedExpression()->getStmtClass())
8605 break;
8606 // Are we dealing with different variables/fields?
8607 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8608 break;
8609 }
8610 // Found overlapping if, at least for one component, reached the head
8611 // of the components list.
8612 if (CI == CE || SI == SE) {
8613 // Ignore it if it is the same component.
8614 if (CI == CE && SI == SE)
8615 continue;
8616 const auto It = (SI == SE) ? CI : SI;
8617 // If one component is a pointer and another one is a kind of
8618 // dereference of this pointer (array subscript, section, dereference,
8619 // etc.), it is not an overlapping.
8620 // Same, if one component is a base and another component is a
8621 // dereferenced pointer memberexpr with the same base.
8622 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8623 (std::prev(It)->getAssociatedDeclaration() &&
8624 std::prev(It)
8625 ->getAssociatedDeclaration()
8626 ->getType()
8627 ->isPointerType()) ||
8628 (It->getAssociatedDeclaration() &&
8629 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8630 std::next(It) != CE && std::next(It) != SE))
8631 continue;
8632 const MapData &BaseData = CI == CE ? L : L1;
8633 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8634 SI == SE ? Components : Components1;
8635 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8636 OverlappedElements.getSecond().push_back(SubData);
8637 }
8638 }
8639 }
8640 // Sort the overlapped elements for each item.
8641 llvm::SmallVector<const FieldDecl *, 4> Layout;
8642 if (!OverlappedData.empty()) {
8643 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8644 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8645 while (BaseType != OrigType) {
8646 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8647 OrigType = BaseType->getPointeeOrArrayElementType();
8648 }
8649
8650 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8651 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8652 else {
8653 const auto *RD = BaseType->getAsRecordDecl();
8654 Layout.append(RD->field_begin(), RD->field_end());
8655 }
8656 }
8657 for (auto &Pair : OverlappedData) {
8658 llvm::stable_sort(
8659 Pair.getSecond(),
8660 [&Layout](
8661 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8662 OMPClauseMappableExprCommon::MappableExprComponentListRef
8663 Second) {
8664 auto CI = First.rbegin();
8665 auto CE = First.rend();
8666 auto SI = Second.rbegin();
8667 auto SE = Second.rend();
8668 for (; CI != CE && SI != SE; ++CI, ++SI) {
8669 if (CI->getAssociatedExpression()->getStmtClass() !=
8670 SI->getAssociatedExpression()->getStmtClass())
8671 break;
8672 // Are we dealing with different variables/fields?
8673 if (CI->getAssociatedDeclaration() !=
8674 SI->getAssociatedDeclaration())
8675 break;
8676 }
8677
8678 // Lists contain the same elements.
8679 if (CI == CE && SI == SE)
8680 return false;
8681
8682 // List with less elements is less than list with more elements.
8683 if (CI == CE || SI == SE)
8684 return CI == CE;
8685
8686 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8687 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8688 if (FD1->getParent() == FD2->getParent())
8689 return FD1->getFieldIndex() < FD2->getFieldIndex();
8690 const auto *It =
8691 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8692 return FD == FD1 || FD == FD2;
8693 });
8694 return *It == FD1;
8695 });
8696 }
8697
8698 // Associated with a capture, because the mapping flags depend on it.
8699 // Go through all of the elements with the overlapped elements.
8700 bool IsFirstComponentList = true;
8701 MapCombinedInfoTy StructBaseCombinedInfo;
8702 for (const auto &Pair : OverlappedData) {
8703 const MapData &L = *Pair.getFirst();
8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8705 OpenMPMapClauseKind MapType;
8706 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8707 bool IsImplicit;
8708 const ValueDecl *Mapper;
8709 const Expr *VarRef;
8710 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8711 L;
8712 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8713 OverlappedComponents = Pair.getSecond();
8714 generateInfoForComponentList(
8715 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8716 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8717 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8718 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8719 IsFirstComponentList = false;
8720 }
8721 // Go through other elements without overlapped elements.
8722 for (const MapData &L : DeclComponentLists) {
8723 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8724 OpenMPMapClauseKind MapType;
8725 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8726 bool IsImplicit;
8727 const ValueDecl *Mapper;
8728 const Expr *VarRef;
8729 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8730 L;
8731 auto It = OverlappedData.find(&L);
8732 if (It == OverlappedData.end())
8733 generateInfoForComponentList(
8734 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8735 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8736 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8737 /*ForDeviceAddr=*/false, VD, VarRef,
8738 /*OverlappedElements*/ std::nullopt,
8739 HasMapBasePtr && HasMapArraySec);
8740 IsFirstComponentList = false;
8741 }
8742 }
8743
8744 /// Generate the default map information for a given capture \a CI,
8745 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const8746 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8747 const FieldDecl &RI, llvm::Value *CV,
8748 MapCombinedInfoTy &CombinedInfo) const {
8749 bool IsImplicit = true;
8750 // Do the default mapping.
8751 if (CI.capturesThis()) {
8752 CombinedInfo.Exprs.push_back(nullptr);
8753 CombinedInfo.BasePointers.push_back(CV);
8754 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8755 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8756 CombinedInfo.Pointers.push_back(CV);
8757 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8758 CombinedInfo.Sizes.push_back(
8759 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8760 CGF.Int64Ty, /*isSigned=*/true));
8761 // Default map type.
8762 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8763 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8764 } else if (CI.capturesVariableByCopy()) {
8765 const VarDecl *VD = CI.getCapturedVar();
8766 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8767 CombinedInfo.BasePointers.push_back(CV);
8768 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8769 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8770 CombinedInfo.Pointers.push_back(CV);
8771 if (!RI.getType()->isAnyPointerType()) {
8772 // We have to signal to the runtime captures passed by value that are
8773 // not pointers.
8774 CombinedInfo.Types.push_back(
8775 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8776 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8777 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8778 } else {
8779 // Pointers are implicitly mapped with a zero size and no flags
8780 // (other than first map that is added for all implicit maps).
8781 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8782 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8783 }
8784 auto I = FirstPrivateDecls.find(VD);
8785 if (I != FirstPrivateDecls.end())
8786 IsImplicit = I->getSecond();
8787 } else {
8788 assert(CI.capturesVariable() && "Expected captured reference.");
8789 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8790 QualType ElementType = PtrTy->getPointeeType();
8791 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8792 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8793 // The default map type for a scalar/complex type is 'to' because by
8794 // default the value doesn't have to be retrieved. For an aggregate
8795 // type, the default is 'tofrom'.
8796 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8797 const VarDecl *VD = CI.getCapturedVar();
8798 auto I = FirstPrivateDecls.find(VD);
8799 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8800 CombinedInfo.BasePointers.push_back(CV);
8801 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8802 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8803 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8804 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8805 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8806 AlignmentSource::Decl));
8807 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8808 } else {
8809 CombinedInfo.Pointers.push_back(CV);
8810 }
8811 if (I != FirstPrivateDecls.end())
8812 IsImplicit = I->getSecond();
8813 }
8814 // Every default map produces a single argument which is a target parameter.
8815 CombinedInfo.Types.back() |=
8816 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8817
8818 // Add flag stating this is an implicit map.
8819 if (IsImplicit)
8820 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8821
8822 // No user-defined mapper for default mapping.
8823 CombinedInfo.Mappers.push_back(nullptr);
8824 }
8825 };
8826 } // anonymous namespace
8827
8828 // Try to extract the base declaration from a `this->x` expression if possible.
getDeclFromThisExpr(const Expr * E)8829 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8830 if (!E)
8831 return nullptr;
8832
8833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8834 if (const MemberExpr *ME =
8835 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8836 return ME->getMemberDecl();
8837 return nullptr;
8838 }
8839
8840 /// Emit a string constant containing the names of the values mapped to the
8841 /// offloading runtime library.
8842 llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)8843 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8844 MappableExprsHandler::MappingExprInfo &MapExprs) {
8845
8846 uint32_t SrcLocStrSize;
8847 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8848 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8849
8850 SourceLocation Loc;
8851 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8852 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8853 Loc = VD->getLocation();
8854 else
8855 Loc = MapExprs.getMapExpr()->getExprLoc();
8856 } else {
8857 Loc = MapExprs.getMapDecl()->getLocation();
8858 }
8859
8860 std::string ExprName;
8861 if (MapExprs.getMapExpr()) {
8862 PrintingPolicy P(CGF.getContext().getLangOpts());
8863 llvm::raw_string_ostream OS(ExprName);
8864 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8865 OS.flush();
8866 } else {
8867 ExprName = MapExprs.getMapDecl()->getNameAsString();
8868 }
8869
8870 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8871 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8872 PLoc.getLine(), PLoc.getColumn(),
8873 SrcLocStrSize);
8874 }
8875
8876 /// Emit the arrays used to pass the captures and map information to the
8877 /// offloading runtime library. If there is no map or capture information,
8878 /// return nullptr by reference.
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false)8879 static void emitOffloadingArrays(
8880 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8881 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8882 bool IsNonContiguous = false) {
8883 CodeGenModule &CGM = CGF.CGM;
8884
8885 // Reset the array information.
8886 Info.clearArrayInfo();
8887 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8888
8889 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8890 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8891 CGF.AllocaInsertPt->getIterator());
8892 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8893 CGF.Builder.GetInsertPoint());
8894
8895 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8896 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8897 };
8898 if (CGM.getCodeGenOpts().getDebugInfo() !=
8899 llvm::codegenoptions::NoDebugInfo) {
8900 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8901 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8902 FillInfoMap);
8903 }
8904
8905 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8906 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8907 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8908 }
8909 };
8910
8911 auto CustomMapperCB = [&](unsigned int I) {
8912 llvm::Value *MFunc = nullptr;
8913 if (CombinedInfo.Mappers[I]) {
8914 Info.HasMapper = true;
8915 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8916 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8917 }
8918 return MFunc;
8919 };
8920 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8921 /*IsNonContiguous=*/true, DeviceAddrCB,
8922 CustomMapperCB);
8923 }
8924
8925 /// Check for inner distribute directive.
8926 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8927 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8928 const auto *CS = D.getInnermostCapturedStmt();
8929 const auto *Body =
8930 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8931 const Stmt *ChildStmt =
8932 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8933
8934 if (const auto *NestedDir =
8935 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8936 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8937 switch (D.getDirectiveKind()) {
8938 case OMPD_target:
8939 // For now, treat 'target' with nested 'teams loop' as if it's
8940 // distributed (target teams distribute).
8941 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8942 return NestedDir;
8943 if (DKind == OMPD_teams) {
8944 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8945 /*IgnoreCaptured=*/true);
8946 if (!Body)
8947 return nullptr;
8948 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8949 if (const auto *NND =
8950 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8951 DKind = NND->getDirectiveKind();
8952 if (isOpenMPDistributeDirective(DKind))
8953 return NND;
8954 }
8955 }
8956 return nullptr;
8957 case OMPD_target_teams:
8958 if (isOpenMPDistributeDirective(DKind))
8959 return NestedDir;
8960 return nullptr;
8961 case OMPD_target_parallel:
8962 case OMPD_target_simd:
8963 case OMPD_target_parallel_for:
8964 case OMPD_target_parallel_for_simd:
8965 return nullptr;
8966 case OMPD_target_teams_distribute:
8967 case OMPD_target_teams_distribute_simd:
8968 case OMPD_target_teams_distribute_parallel_for:
8969 case OMPD_target_teams_distribute_parallel_for_simd:
8970 case OMPD_parallel:
8971 case OMPD_for:
8972 case OMPD_parallel_for:
8973 case OMPD_parallel_master:
8974 case OMPD_parallel_sections:
8975 case OMPD_for_simd:
8976 case OMPD_parallel_for_simd:
8977 case OMPD_cancel:
8978 case OMPD_cancellation_point:
8979 case OMPD_ordered:
8980 case OMPD_threadprivate:
8981 case OMPD_allocate:
8982 case OMPD_task:
8983 case OMPD_simd:
8984 case OMPD_tile:
8985 case OMPD_unroll:
8986 case OMPD_sections:
8987 case OMPD_section:
8988 case OMPD_single:
8989 case OMPD_master:
8990 case OMPD_critical:
8991 case OMPD_taskyield:
8992 case OMPD_barrier:
8993 case OMPD_taskwait:
8994 case OMPD_taskgroup:
8995 case OMPD_atomic:
8996 case OMPD_flush:
8997 case OMPD_depobj:
8998 case OMPD_scan:
8999 case OMPD_teams:
9000 case OMPD_target_data:
9001 case OMPD_target_exit_data:
9002 case OMPD_target_enter_data:
9003 case OMPD_distribute:
9004 case OMPD_distribute_simd:
9005 case OMPD_distribute_parallel_for:
9006 case OMPD_distribute_parallel_for_simd:
9007 case OMPD_teams_distribute:
9008 case OMPD_teams_distribute_simd:
9009 case OMPD_teams_distribute_parallel_for:
9010 case OMPD_teams_distribute_parallel_for_simd:
9011 case OMPD_target_update:
9012 case OMPD_declare_simd:
9013 case OMPD_declare_variant:
9014 case OMPD_begin_declare_variant:
9015 case OMPD_end_declare_variant:
9016 case OMPD_declare_target:
9017 case OMPD_end_declare_target:
9018 case OMPD_declare_reduction:
9019 case OMPD_declare_mapper:
9020 case OMPD_taskloop:
9021 case OMPD_taskloop_simd:
9022 case OMPD_master_taskloop:
9023 case OMPD_master_taskloop_simd:
9024 case OMPD_parallel_master_taskloop:
9025 case OMPD_parallel_master_taskloop_simd:
9026 case OMPD_requires:
9027 case OMPD_metadirective:
9028 case OMPD_unknown:
9029 default:
9030 llvm_unreachable("Unexpected directive.");
9031 }
9032 }
9033
9034 return nullptr;
9035 }
9036
9037 /// Emit the user-defined mapper function. The code generation follows the
9038 /// pattern in the example below.
9039 /// \code
9040 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9041 /// void *base, void *begin,
9042 /// int64_t size, int64_t type,
9043 /// void *name = nullptr) {
9044 /// // Allocate space for an array section first or add a base/begin for
9045 /// // pointer dereference.
9046 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9047 /// !maptype.IsDelete)
9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049 /// size*sizeof(Ty), clearToFromMember(type));
9050 /// // Map members.
9051 /// for (unsigned i = 0; i < size; i++) {
9052 /// // For each component specified by this mapper:
9053 /// for (auto c : begin[i]->all_components) {
9054 /// if (c.hasMapper())
9055 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9056 /// c.arg_type, c.arg_name);
9057 /// else
9058 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9059 /// c.arg_begin, c.arg_size, c.arg_type,
9060 /// c.arg_name);
9061 /// }
9062 /// }
9063 /// // Delete the array section.
9064 /// if (size > 1 && maptype.IsDelete)
9065 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9066 /// size*sizeof(Ty), clearToFromMember(type));
9067 /// }
9068 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9069 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9070 CodeGenFunction *CGF) {
9071 if (UDMMap.count(D) > 0)
9072 return;
9073 ASTContext &C = CGM.getContext();
9074 QualType Ty = D->getType();
9075 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9076 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9077 auto *MapperVarDecl =
9078 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9079 SourceLocation Loc = D->getLocation();
9080 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9081 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9082
9083 // Prepare mapper function arguments and attributes.
9084 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9085 C.VoidPtrTy, ImplicitParamKind::Other);
9086 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9087 ImplicitParamKind::Other);
9088 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9089 C.VoidPtrTy, ImplicitParamKind::Other);
9090 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9091 ImplicitParamKind::Other);
9092 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9093 ImplicitParamKind::Other);
9094 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9095 ImplicitParamKind::Other);
9096 FunctionArgList Args;
9097 Args.push_back(&HandleArg);
9098 Args.push_back(&BaseArg);
9099 Args.push_back(&BeginArg);
9100 Args.push_back(&SizeArg);
9101 Args.push_back(&TypeArg);
9102 Args.push_back(&NameArg);
9103 const CGFunctionInfo &FnInfo =
9104 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9105 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9106 SmallString<64> TyStr;
9107 llvm::raw_svector_ostream Out(TyStr);
9108 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9109 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9110 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9111 Name, &CGM.getModule());
9112 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9113 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9114 // Start the mapper function code generation.
9115 CodeGenFunction MapperCGF(CGM);
9116 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9117 // Compute the starting and end addresses of array elements.
9118 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9119 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9120 C.getPointerType(Int64Ty), Loc);
9121 // Prepare common arguments for array initiation and deletion.
9122 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9123 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9124 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9125 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9126 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9127 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9128 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9129 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9130 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9131 // Convert the size in bytes into the number of array elements.
9132 Size = MapperCGF.Builder.CreateExactUDiv(
9133 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9134 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9135 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9136 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9137 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9138 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9139 C.getPointerType(Int64Ty), Loc);
9140 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9141 MapperCGF.GetAddrOfLocalVar(&NameArg),
9142 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9143
9144 // Emit array initiation if this is an array section and \p MapType indicates
9145 // that memory allocation is required.
9146 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9147 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9148 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9149
9150 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9151
9152 // Emit the loop header block.
9153 MapperCGF.EmitBlock(HeadBB);
9154 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9155 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9156 // Evaluate whether the initial condition is satisfied.
9157 llvm::Value *IsEmpty =
9158 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9159 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9160 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9161
9162 // Emit the loop body block.
9163 MapperCGF.EmitBlock(BodyBB);
9164 llvm::BasicBlock *LastBB = BodyBB;
9165 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9166 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9167 PtrPHI->addIncoming(PtrBegin, EntryBB);
9168 Address PtrCurrent(PtrPHI, ElemTy,
9169 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9170 .getAlignment()
9171 .alignmentOfArrayElement(ElementSize));
9172 // Privatize the declared variable of mapper to be the current array element.
9173 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9174 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9175 (void)Scope.Privatize();
9176
9177 // Get map clause information. Fill up the arrays with all mapped variables.
9178 MappableExprsHandler::MapCombinedInfoTy Info;
9179 MappableExprsHandler MEHandler(*D, MapperCGF);
9180 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9181
9182 // Call the runtime API __tgt_mapper_num_components to get the number of
9183 // pre-existing components.
9184 llvm::Value *OffloadingArgs[] = {Handle};
9185 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9186 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9187 OMPRTL___tgt_mapper_num_components),
9188 OffloadingArgs);
9189 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9190 PreviousSize,
9191 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9192
9193 // Fill up the runtime mapper handle for all components.
9194 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9195 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9196 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9197 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9198 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9199 llvm::Value *CurSizeArg = Info.Sizes[I];
9200 llvm::Value *CurNameArg =
9201 (CGM.getCodeGenOpts().getDebugInfo() ==
9202 llvm::codegenoptions::NoDebugInfo)
9203 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9204 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9205
9206 // Extract the MEMBER_OF field from the map type.
9207 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9208 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209 Info.Types[I]));
9210 llvm::Value *MemberMapType =
9211 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9212
9213 // Combine the map type inherited from user-defined mapper with that
9214 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9215 // bits of the \a MapType, which is the input argument of the mapper
9216 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9217 // bits of MemberMapType.
9218 // [OpenMP 5.0], 1.2.6. map-type decay.
9219 // | alloc | to | from | tofrom | release | delete
9220 // ----------------------------------------------------------
9221 // alloc | alloc | alloc | alloc | alloc | release | delete
9222 // to | alloc | to | alloc | to | release | delete
9223 // from | alloc | alloc | from | from | release | delete
9224 // tofrom | alloc | to | from | tofrom | release | delete
9225 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9226 MapType,
9227 MapperCGF.Builder.getInt64(
9228 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9231 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9232 llvm::BasicBlock *AllocElseBB =
9233 MapperCGF.createBasicBlock("omp.type.alloc.else");
9234 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9235 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9236 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9237 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9238 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9239 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9240 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9241 MapperCGF.EmitBlock(AllocBB);
9242 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9243 MemberMapType,
9244 MapperCGF.Builder.getInt64(
9245 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9246 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9247 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9248 MapperCGF.Builder.CreateBr(EndBB);
9249 MapperCGF.EmitBlock(AllocElseBB);
9250 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9251 LeftToFrom,
9252 MapperCGF.Builder.getInt64(
9253 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9254 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9255 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9256 // In case of to, clear OMP_MAP_FROM.
9257 MapperCGF.EmitBlock(ToBB);
9258 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9259 MemberMapType,
9260 MapperCGF.Builder.getInt64(
9261 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9263 MapperCGF.Builder.CreateBr(EndBB);
9264 MapperCGF.EmitBlock(ToElseBB);
9265 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9266 LeftToFrom,
9267 MapperCGF.Builder.getInt64(
9268 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9269 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9270 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9271 // In case of from, clear OMP_MAP_TO.
9272 MapperCGF.EmitBlock(FromBB);
9273 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9274 MemberMapType,
9275 MapperCGF.Builder.getInt64(
9276 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9277 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9278 // In case of tofrom, do nothing.
9279 MapperCGF.EmitBlock(EndBB);
9280 LastBB = EndBB;
9281 llvm::PHINode *CurMapType =
9282 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9283 CurMapType->addIncoming(AllocMapType, AllocBB);
9284 CurMapType->addIncoming(ToMapType, ToBB);
9285 CurMapType->addIncoming(FromMapType, FromBB);
9286 CurMapType->addIncoming(MemberMapType, ToElseBB);
9287
9288 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9289 CurSizeArg, CurMapType, CurNameArg};
9290 if (Info.Mappers[I]) {
9291 // Call the corresponding mapper function.
9292 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9293 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9294 assert(MapperFunc && "Expect a valid mapper function is available.");
9295 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9296 } else {
9297 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9298 // data structure.
9299 MapperCGF.EmitRuntimeCall(
9300 OMPBuilder.getOrCreateRuntimeFunction(
9301 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9302 OffloadingArgs);
9303 }
9304 }
9305
9306 // Update the pointer to point to the next element that needs to be mapped,
9307 // and check whether we have mapped all elements.
9308 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9309 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9310 PtrPHI->addIncoming(PtrNext, LastBB);
9311 llvm::Value *IsDone =
9312 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9313 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9314 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9315
9316 MapperCGF.EmitBlock(ExitBB);
9317 // Emit array deletion if this is an array section and \p MapType indicates
9318 // that deletion is required.
9319 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9320 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9321
9322 // Emit the function exit block.
9323 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9324 MapperCGF.FinishFunction();
9325 UDMMap.try_emplace(D, Fn);
9326 if (CGF) {
9327 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9328 Decls.second.push_back(D);
9329 }
9330 }
9331
9332 /// Emit the array initialization or deletion portion for user-defined mapper
9333 /// code generation. First, it evaluates whether an array section is mapped and
9334 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9335 /// true, and \a MapType indicates to not delete this array, array
9336 /// initialization code is generated. If \a IsInit is false, and \a MapType
9337 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,llvm::Value * MapName,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9338 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9339 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9340 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9341 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9342 bool IsInit) {
9343 StringRef Prefix = IsInit ? ".init" : ".del";
9344
9345 // Evaluate if this is an array section.
9346 llvm::BasicBlock *BodyBB =
9347 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9348 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9349 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9350 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9351 MapType,
9352 MapperCGF.Builder.getInt64(
9353 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9355 llvm::Value *DeleteCond;
9356 llvm::Value *Cond;
9357 if (IsInit) {
9358 // base != begin?
9359 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9360 // IsPtrAndObj?
9361 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9362 MapType,
9363 MapperCGF.Builder.getInt64(
9364 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9365 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9366 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9367 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9368 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9369 DeleteCond = MapperCGF.Builder.CreateIsNull(
9370 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9371 } else {
9372 Cond = IsArray;
9373 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9374 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9375 }
9376 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9377 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9378
9379 MapperCGF.EmitBlock(BodyBB);
9380 // Get the array size by multiplying element size and element number (i.e., \p
9381 // Size).
9382 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9383 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9384 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9385 // memory allocation/deletion purpose only.
9386 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9387 MapType,
9388 MapperCGF.Builder.getInt64(
9389 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9390 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9391 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9392 MapTypeArg = MapperCGF.Builder.CreateOr(
9393 MapTypeArg,
9394 MapperCGF.Builder.getInt64(
9395 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9396 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9397
9398 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9399 // data structure.
9400 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9401 ArraySize, MapTypeArg, MapName};
9402 MapperCGF.EmitRuntimeCall(
9403 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9404 OMPRTL___tgt_push_mapper_component),
9405 OffloadingArgs);
9406 }
9407
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)9408 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9409 const OMPDeclareMapperDecl *D) {
9410 auto I = UDMMap.find(D);
9411 if (I != UDMMap.end())
9412 return I->second;
9413 emitUserDefinedMapper(D);
9414 return UDMMap.lookup(D);
9415 }
9416
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9417 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9418 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9419 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9420 const OMPLoopDirective &D)>
9421 SizeEmitter) {
9422 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9423 const OMPExecutableDirective *TD = &D;
9424 // Get nested teams distribute kind directive, if any. For now, treat
9425 // 'target_teams_loop' as if it's really a target_teams_distribute.
9426 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9427 Kind != OMPD_target_teams_loop)
9428 TD = getNestedDistributeDirective(CGM.getContext(), D);
9429 if (!TD)
9430 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9431
9432 const auto *LD = cast<OMPLoopDirective>(TD);
9433 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9434 return NumIterations;
9435 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9436 }
9437
9438 static void
emitTargetCallFallback(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9439 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9440 const OMPExecutableDirective &D,
9441 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9442 bool RequiresOuterTask, const CapturedStmt &CS,
9443 bool OffloadingMandatory, CodeGenFunction &CGF) {
9444 if (OffloadingMandatory) {
9445 CGF.Builder.CreateUnreachable();
9446 } else {
9447 if (RequiresOuterTask) {
9448 CapturedVars.clear();
9449 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9450 }
9451 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9452 CapturedVars);
9453 }
9454 }
9455
emitDeviceID(llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,CodeGenFunction & CGF)9456 static llvm::Value *emitDeviceID(
9457 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9458 CodeGenFunction &CGF) {
9459 // Emit device ID if any.
9460 llvm::Value *DeviceID;
9461 if (Device.getPointer()) {
9462 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9463 Device.getInt() == OMPC_DEVICE_device_num) &&
9464 "Expected device_num modifier.");
9465 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9466 DeviceID =
9467 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9468 } else {
9469 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9470 }
9471 return DeviceID;
9472 }
9473
emitDynCGGroupMem(const OMPExecutableDirective & D,CodeGenFunction & CGF)9474 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9475 CodeGenFunction &CGF) {
9476 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9477
9478 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9479 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9480 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9481 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9482 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9483 /*isSigned=*/false);
9484 }
9485 return DynCGroupMem;
9486 }
9487
emitTargetCallKernelLaunch(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::Value * OutlinedFnID,CodeGenFunction::OMPTargetDataInfo & InputInfo,llvm::Value * & MapTypesArray,llvm::Value * & MapNamesArray,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter,CodeGenFunction & CGF,CodeGenModule & CGM)9488 static void emitTargetCallKernelLaunch(
9489 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9490 const OMPExecutableDirective &D,
9491 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9492 const CapturedStmt &CS, bool OffloadingMandatory,
9493 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9494 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9495 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9496 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9497 const OMPLoopDirective &D)>
9498 SizeEmitter,
9499 CodeGenFunction &CGF, CodeGenModule &CGM) {
9500 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9501
9502 // Fill up the arrays with all the captured variables.
9503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9504
9505 // Get mappable expression information.
9506 MappableExprsHandler MEHandler(D, CGF);
9507 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9508 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9509
9510 auto RI = CS.getCapturedRecordDecl()->field_begin();
9511 auto *CV = CapturedVars.begin();
9512 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9513 CE = CS.capture_end();
9514 CI != CE; ++CI, ++RI, ++CV) {
9515 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9516 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9517
9518 // VLA sizes are passed to the outlined region by copy and do not have map
9519 // information associated.
9520 if (CI->capturesVariableArrayType()) {
9521 CurInfo.Exprs.push_back(nullptr);
9522 CurInfo.BasePointers.push_back(*CV);
9523 CurInfo.DevicePtrDecls.push_back(nullptr);
9524 CurInfo.DevicePointers.push_back(
9525 MappableExprsHandler::DeviceInfoTy::None);
9526 CurInfo.Pointers.push_back(*CV);
9527 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9528 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9529 // Copy to the device as an argument. No need to retrieve it.
9530 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9531 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9532 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9533 CurInfo.Mappers.push_back(nullptr);
9534 } else {
9535 // If we have any information in the map clause, we use it, otherwise we
9536 // just do a default mapping.
9537 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9538 if (!CI->capturesThis())
9539 MappedVarSet.insert(CI->getCapturedVar());
9540 else
9541 MappedVarSet.insert(nullptr);
9542 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9543 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9544 // Generate correct mapping for variables captured by reference in
9545 // lambdas.
9546 if (CI->capturesVariable())
9547 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9548 CurInfo, LambdaPointers);
9549 }
9550 // We expect to have at least an element of information for this capture.
9551 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9552 "Non-existing map pointer for capture!");
9553 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9554 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9555 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9556 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9557 "Inconsistent map information sizes!");
9558
9559 // If there is an entry in PartialStruct it means we have a struct with
9560 // individual members mapped. Emit an extra combined entry.
9561 if (PartialStruct.Base.isValid()) {
9562 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9563 MEHandler.emitCombinedEntry(
9564 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9565 OMPBuilder, nullptr,
9566 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9567 }
9568
9569 // We need to append the results of this capture to what we already have.
9570 CombinedInfo.append(CurInfo);
9571 }
9572 // Adjust MEMBER_OF flags for the lambdas captures.
9573 MEHandler.adjustMemberOfForLambdaCaptures(
9574 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9575 CombinedInfo.Pointers, CombinedInfo.Types);
9576 // Map any list items in a map clause that were not captures because they
9577 // weren't referenced within the construct.
9578 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9579
9580 CGOpenMPRuntime::TargetDataInfo Info;
9581 // Fill up the arrays and create the arguments.
9582 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9583 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9584 llvm::codegenoptions::NoDebugInfo;
9585 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9586 EmitDebug,
9587 /*ForEndCall=*/false);
9588
9589 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9590 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9591 CGF.VoidPtrTy, CGM.getPointerAlign());
9592 InputInfo.PointersArray =
9593 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9594 InputInfo.SizesArray =
9595 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9596 InputInfo.MappersArray =
9597 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9598 MapTypesArray = Info.RTArgs.MapTypesArray;
9599 MapNamesArray = Info.RTArgs.MapNamesArray;
9600
9601 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9602 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9603 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9604 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9605 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9606
9607 if (IsReverseOffloading) {
9608 // Reverse offloading is not supported, so just execute on the host.
9609 // FIXME: This fallback solution is incorrect since it ignores the
9610 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9611 // assert here and ensure SEMA emits an error.
9612 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9613 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9614 return;
9615 }
9616
9617 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9618 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9619
9620 llvm::Value *BasePointersArray =
9621 InputInfo.BasePointersArray.emitRawPointer(CGF);
9622 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9623 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9624 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9625
9626 auto &&EmitTargetCallFallbackCB =
9627 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9628 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9629 -> llvm::OpenMPIRBuilder::InsertPointTy {
9630 CGF.Builder.restoreIP(IP);
9631 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9632 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9633 return CGF.Builder.saveIP();
9634 };
9635
9636 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9637 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9638 llvm::Value *NumThreads =
9639 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9640 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9641 llvm::Value *NumIterations =
9642 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9643 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9644 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9645 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9646
9647 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9648 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9649 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9650
9651 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9652 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9653 DynCGGroupMem, HasNoWait);
9654
9655 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9656 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9657 DeviceID, RTLoc, AllocaIP));
9658 };
9659
9660 if (RequiresOuterTask)
9661 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9662 else
9663 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9664 }
9665
9666 static void
emitTargetCallElse(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9667 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9668 const OMPExecutableDirective &D,
9669 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9670 bool RequiresOuterTask, const CapturedStmt &CS,
9671 bool OffloadingMandatory, CodeGenFunction &CGF) {
9672
9673 // Notify that the host version must be executed.
9674 auto &&ElseGen =
9675 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9676 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9677 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9678 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9679 };
9680
9681 if (RequiresOuterTask) {
9682 CodeGenFunction::OMPTargetDataInfo InputInfo;
9683 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9684 } else {
9685 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9686 }
9687 }
9688
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9689 void CGOpenMPRuntime::emitTargetCall(
9690 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9691 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9692 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9693 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9694 const OMPLoopDirective &D)>
9695 SizeEmitter) {
9696 if (!CGF.HaveInsertPoint())
9697 return;
9698
9699 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9700 CGM.getLangOpts().OpenMPOffloadMandatory;
9701
9702 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9703
9704 const bool RequiresOuterTask =
9705 D.hasClausesOfKind<OMPDependClause>() ||
9706 D.hasClausesOfKind<OMPNowaitClause>() ||
9707 D.hasClausesOfKind<OMPInReductionClause>() ||
9708 (CGM.getLangOpts().OpenMP >= 51 &&
9709 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9710 D.hasClausesOfKind<OMPThreadLimitClause>());
9711 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9712 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9713 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9714 PrePostActionTy &) {
9715 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9716 };
9717 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9718
9719 CodeGenFunction::OMPTargetDataInfo InputInfo;
9720 llvm::Value *MapTypesArray = nullptr;
9721 llvm::Value *MapNamesArray = nullptr;
9722
9723 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9724 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9725 OutlinedFnID, &InputInfo, &MapTypesArray,
9726 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9727 PrePostActionTy &) {
9728 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9729 RequiresOuterTask, CS, OffloadingMandatory,
9730 Device, OutlinedFnID, InputInfo, MapTypesArray,
9731 MapNamesArray, SizeEmitter, CGF, CGM);
9732 };
9733
9734 auto &&TargetElseGen =
9735 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9736 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9737 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9738 CS, OffloadingMandatory, CGF);
9739 };
9740
9741 // If we have a target function ID it means that we need to support
9742 // offloading, otherwise, just execute on the host. We need to execute on host
9743 // regardless of the conditional in the if clause if, e.g., the user do not
9744 // specify target triples.
9745 if (OutlinedFnID) {
9746 if (IfCond) {
9747 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9748 } else {
9749 RegionCodeGenTy ThenRCG(TargetThenGen);
9750 ThenRCG(CGF);
9751 }
9752 } else {
9753 RegionCodeGenTy ElseRCG(TargetElseGen);
9754 ElseRCG(CGF);
9755 }
9756 }
9757
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9758 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9759 StringRef ParentName) {
9760 if (!S)
9761 return;
9762
9763 // Codegen OMP target directives that offload compute to the device.
9764 bool RequiresDeviceCodegen =
9765 isa<OMPExecutableDirective>(S) &&
9766 isOpenMPTargetExecutionDirective(
9767 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9768
9769 if (RequiresDeviceCodegen) {
9770 const auto &E = *cast<OMPExecutableDirective>(S);
9771
9772 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9773 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9774
9775 // Is this a target region that should not be emitted as an entry point? If
9776 // so just signal we are done with this target region.
9777 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9778 return;
9779
9780 switch (E.getDirectiveKind()) {
9781 case OMPD_target:
9782 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9783 cast<OMPTargetDirective>(E));
9784 break;
9785 case OMPD_target_parallel:
9786 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9787 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9788 break;
9789 case OMPD_target_teams:
9790 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9791 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9792 break;
9793 case OMPD_target_teams_distribute:
9794 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9795 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9796 break;
9797 case OMPD_target_teams_distribute_simd:
9798 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9799 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9800 break;
9801 case OMPD_target_parallel_for:
9802 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9803 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9804 break;
9805 case OMPD_target_parallel_for_simd:
9806 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9807 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9808 break;
9809 case OMPD_target_simd:
9810 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9811 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9812 break;
9813 case OMPD_target_teams_distribute_parallel_for:
9814 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9815 CGM, ParentName,
9816 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9817 break;
9818 case OMPD_target_teams_distribute_parallel_for_simd:
9819 CodeGenFunction::
9820 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9821 CGM, ParentName,
9822 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9823 break;
9824 case OMPD_target_teams_loop:
9825 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9826 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9827 break;
9828 case OMPD_target_parallel_loop:
9829 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9830 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9831 break;
9832 case OMPD_parallel:
9833 case OMPD_for:
9834 case OMPD_parallel_for:
9835 case OMPD_parallel_master:
9836 case OMPD_parallel_sections:
9837 case OMPD_for_simd:
9838 case OMPD_parallel_for_simd:
9839 case OMPD_cancel:
9840 case OMPD_cancellation_point:
9841 case OMPD_ordered:
9842 case OMPD_threadprivate:
9843 case OMPD_allocate:
9844 case OMPD_task:
9845 case OMPD_simd:
9846 case OMPD_tile:
9847 case OMPD_unroll:
9848 case OMPD_sections:
9849 case OMPD_section:
9850 case OMPD_single:
9851 case OMPD_master:
9852 case OMPD_critical:
9853 case OMPD_taskyield:
9854 case OMPD_barrier:
9855 case OMPD_taskwait:
9856 case OMPD_taskgroup:
9857 case OMPD_atomic:
9858 case OMPD_flush:
9859 case OMPD_depobj:
9860 case OMPD_scan:
9861 case OMPD_teams:
9862 case OMPD_target_data:
9863 case OMPD_target_exit_data:
9864 case OMPD_target_enter_data:
9865 case OMPD_distribute:
9866 case OMPD_distribute_simd:
9867 case OMPD_distribute_parallel_for:
9868 case OMPD_distribute_parallel_for_simd:
9869 case OMPD_teams_distribute:
9870 case OMPD_teams_distribute_simd:
9871 case OMPD_teams_distribute_parallel_for:
9872 case OMPD_teams_distribute_parallel_for_simd:
9873 case OMPD_target_update:
9874 case OMPD_declare_simd:
9875 case OMPD_declare_variant:
9876 case OMPD_begin_declare_variant:
9877 case OMPD_end_declare_variant:
9878 case OMPD_declare_target:
9879 case OMPD_end_declare_target:
9880 case OMPD_declare_reduction:
9881 case OMPD_declare_mapper:
9882 case OMPD_taskloop:
9883 case OMPD_taskloop_simd:
9884 case OMPD_master_taskloop:
9885 case OMPD_master_taskloop_simd:
9886 case OMPD_parallel_master_taskloop:
9887 case OMPD_parallel_master_taskloop_simd:
9888 case OMPD_requires:
9889 case OMPD_metadirective:
9890 case OMPD_unknown:
9891 default:
9892 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9893 }
9894 return;
9895 }
9896
9897 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9898 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9899 return;
9900
9901 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9902 return;
9903 }
9904
9905 // If this is a lambda function, look into its body.
9906 if (const auto *L = dyn_cast<LambdaExpr>(S))
9907 S = L->getBody();
9908
9909 // Keep looking for target regions recursively.
9910 for (const Stmt *II : S->children())
9911 scanForTargetRegionsFunctions(II, ParentName);
9912 }
9913
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)9914 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9915 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9916 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9917 if (!DevTy)
9918 return false;
9919 // Do not emit device_type(nohost) functions for the host.
9920 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9921 return true;
9922 // Do not emit device_type(host) functions for the device.
9923 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9924 return true;
9925 return false;
9926 }
9927
emitTargetFunctions(GlobalDecl GD)9928 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9929 // If emitting code for the host, we do not process FD here. Instead we do
9930 // the normal code generation.
9931 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9932 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9933 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9934 CGM.getLangOpts().OpenMPIsTargetDevice))
9935 return true;
9936 return false;
9937 }
9938
9939 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9940 // Try to detect target regions in the function.
9941 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9942 StringRef Name = CGM.getMangledName(GD);
9943 scanForTargetRegionsFunctions(FD->getBody(), Name);
9944 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9945 CGM.getLangOpts().OpenMPIsTargetDevice))
9946 return true;
9947 }
9948
9949 // Do not to emit function if it is not marked as declare target.
9950 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9951 AlreadyEmittedTargetDecls.count(VD) == 0;
9952 }
9953
emitTargetGlobalVariable(GlobalDecl GD)9954 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9955 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9956 CGM.getLangOpts().OpenMPIsTargetDevice))
9957 return true;
9958
9959 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9960 return false;
9961
9962 // Check if there are Ctors/Dtors in this declaration and look for target
9963 // regions in it. We use the complete variant to produce the kernel name
9964 // mangling.
9965 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9966 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9967 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9968 StringRef ParentName =
9969 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9970 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9971 }
9972 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9973 StringRef ParentName =
9974 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9975 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9976 }
9977 }
9978
9979 // Do not to emit variable if it is not marked as declare target.
9980 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9982 cast<VarDecl>(GD.getDecl()));
9983 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9984 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986 HasRequiresUnifiedSharedMemory)) {
9987 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9988 return true;
9989 }
9990 return false;
9991 }
9992
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9993 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9994 llvm::Constant *Addr) {
9995 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9996 !CGM.getLangOpts().OpenMPIsTargetDevice)
9997 return;
9998
9999 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10001
10002 // If this is an 'extern' declaration we defer to the canonical definition and
10003 // do not emit an offloading entry.
10004 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10005 VD->hasExternalStorage())
10006 return;
10007
10008 if (!Res) {
10009 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10010 // Register non-target variables being emitted in device code (debug info
10011 // may cause this).
10012 StringRef VarName = CGM.getMangledName(VD);
10013 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10014 }
10015 return;
10016 }
10017
10018 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10019 auto LinkageForVariable = [&VD, this]() {
10020 return CGM.getLLVMLinkageVarDefinition(VD);
10021 };
10022
10023 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10024 OMPBuilder.registerTargetGlobalVariable(
10025 convertCaptureClause(VD), convertDeviceClause(VD),
10026 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10027 VD->isExternallyVisible(),
10028 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10029 VD->getCanonicalDecl()->getBeginLoc()),
10030 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10031 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10032 CGM.getTypes().ConvertTypeForMem(
10033 CGM.getContext().getPointerType(VD->getType())),
10034 Addr);
10035
10036 for (auto *ref : GeneratedRefs)
10037 CGM.addCompilerUsedGlobal(ref);
10038 }
10039
emitTargetGlobal(GlobalDecl GD)10040 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10041 if (isa<FunctionDecl>(GD.getDecl()) ||
10042 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10043 return emitTargetFunctions(GD);
10044
10045 return emitTargetGlobalVariable(GD);
10046 }
10047
emitDeferredTargetDecls() const10048 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10049 for (const VarDecl *VD : DeferredGlobalVariables) {
10050 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10052 if (!Res)
10053 continue;
10054 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10055 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10056 !HasRequiresUnifiedSharedMemory) {
10057 CGM.EmitGlobal(VD);
10058 } else {
10059 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10060 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10061 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10062 HasRequiresUnifiedSharedMemory)) &&
10063 "Expected link clause or to clause with unified memory.");
10064 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10065 }
10066 }
10067 }
10068
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10069 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10070 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10071 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10072 " Expected target-based directive.");
10073 }
10074
processRequiresDirective(const OMPRequiresDecl * D)10075 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10076 for (const OMPClause *Clause : D->clauselists()) {
10077 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10078 HasRequiresUnifiedSharedMemory = true;
10079 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10080 } else if (const auto *AC =
10081 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10082 switch (AC->getAtomicDefaultMemOrderKind()) {
10083 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10084 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10085 break;
10086 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10087 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10088 break;
10089 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10090 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10091 break;
10092 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10093 break;
10094 }
10095 }
10096 }
10097 }
10098
getDefaultMemoryOrdering() const10099 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10100 return RequiresAtomicOrdering;
10101 }
10102
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10103 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10104 LangAS &AS) {
10105 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10106 return false;
10107 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10108 switch(A->getAllocatorType()) {
10109 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10110 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10111 // Not supported, fallback to the default mem space.
10112 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10113 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10114 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10115 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10116 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10117 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10118 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10119 AS = LangAS::Default;
10120 return true;
10121 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10122 llvm_unreachable("Expected predefined allocator for the variables with the "
10123 "static storage.");
10124 }
10125 return false;
10126 }
10127
hasRequiresUnifiedSharedMemory() const10128 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10129 return HasRequiresUnifiedSharedMemory;
10130 }
10131
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10132 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10133 CodeGenModule &CGM)
10134 : CGM(CGM) {
10135 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10136 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10137 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10138 }
10139 }
10140
~DisableAutoDeclareTargetRAII()10141 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10142 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10143 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10144 }
10145
markAsGlobalTarget(GlobalDecl GD)10146 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10147 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10148 return true;
10149
10150 const auto *D = cast<FunctionDecl>(GD.getDecl());
10151 // Do not to emit function if it is marked as declare target as it was already
10152 // emitted.
10153 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10154 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10155 if (auto *F = dyn_cast_or_null<llvm::Function>(
10156 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10157 return !F->isDeclaration();
10158 return false;
10159 }
10160 return true;
10161 }
10162
10163 return !AlreadyEmittedTargetDecls.insert(D).second;
10164 }
10165
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10166 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10167 const OMPExecutableDirective &D,
10168 SourceLocation Loc,
10169 llvm::Function *OutlinedFn,
10170 ArrayRef<llvm::Value *> CapturedVars) {
10171 if (!CGF.HaveInsertPoint())
10172 return;
10173
10174 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10175 CodeGenFunction::RunCleanupsScope Scope(CGF);
10176
10177 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10178 llvm::Value *Args[] = {
10179 RTLoc,
10180 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10181 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10182 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10183 RealArgs.append(std::begin(Args), std::end(Args));
10184 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10185
10186 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10187 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10188 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10189 }
10190
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10191 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10192 const Expr *NumTeams,
10193 const Expr *ThreadLimit,
10194 SourceLocation Loc) {
10195 if (!CGF.HaveInsertPoint())
10196 return;
10197
10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199
10200 llvm::Value *NumTeamsVal =
10201 NumTeams
10202 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10203 CGF.CGM.Int32Ty, /* isSigned = */ true)
10204 : CGF.Builder.getInt32(0);
10205
10206 llvm::Value *ThreadLimitVal =
10207 ThreadLimit
10208 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10209 CGF.CGM.Int32Ty, /* isSigned = */ true)
10210 : CGF.Builder.getInt32(0);
10211
10212 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10213 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10214 ThreadLimitVal};
10215 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10216 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10217 PushNumTeamsArgs);
10218 }
10219
emitThreadLimitClause(CodeGenFunction & CGF,const Expr * ThreadLimit,SourceLocation Loc)10220 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10221 const Expr *ThreadLimit,
10222 SourceLocation Loc) {
10223 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10224 llvm::Value *ThreadLimitVal =
10225 ThreadLimit
10226 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10227 CGF.CGM.Int32Ty, /* isSigned = */ true)
10228 : CGF.Builder.getInt32(0);
10229
10230 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10231 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10232 ThreadLimitVal};
10233 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10234 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10235 ThreadLimitArgs);
10236 }
10237
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)10238 void CGOpenMPRuntime::emitTargetDataCalls(
10239 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10240 const Expr *Device, const RegionCodeGenTy &CodeGen,
10241 CGOpenMPRuntime::TargetDataInfo &Info) {
10242 if (!CGF.HaveInsertPoint())
10243 return;
10244
10245 // Action used to replace the default codegen action and turn privatization
10246 // off.
10247 PrePostActionTy NoPrivAction;
10248
10249 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10250
10251 llvm::Value *IfCondVal = nullptr;
10252 if (IfCond)
10253 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10254
10255 // Emit device ID if any.
10256 llvm::Value *DeviceID = nullptr;
10257 if (Device) {
10258 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10259 CGF.Int64Ty, /*isSigned=*/true);
10260 } else {
10261 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10262 }
10263
10264 // Fill up the arrays with all the mapped variables.
10265 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10266 auto GenMapInfoCB =
10267 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10268 CGF.Builder.restoreIP(CodeGenIP);
10269 // Get map clause information.
10270 MappableExprsHandler MEHandler(D, CGF);
10271 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10272
10273 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10274 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10275 };
10276 if (CGM.getCodeGenOpts().getDebugInfo() !=
10277 llvm::codegenoptions::NoDebugInfo) {
10278 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10279 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10280 FillInfoMap);
10281 }
10282
10283 return CombinedInfo;
10284 };
10285 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10286 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10287 CGF.Builder.restoreIP(CodeGenIP);
10288 switch (BodyGenType) {
10289 case BodyGenTy::Priv:
10290 if (!Info.CaptureDeviceAddrMap.empty())
10291 CodeGen(CGF);
10292 break;
10293 case BodyGenTy::DupNoPriv:
10294 if (!Info.CaptureDeviceAddrMap.empty()) {
10295 CodeGen.setAction(NoPrivAction);
10296 CodeGen(CGF);
10297 }
10298 break;
10299 case BodyGenTy::NoPriv:
10300 if (Info.CaptureDeviceAddrMap.empty()) {
10301 CodeGen.setAction(NoPrivAction);
10302 CodeGen(CGF);
10303 }
10304 break;
10305 }
10306 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10307 CGF.Builder.GetInsertPoint());
10308 };
10309
10310 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10311 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10312 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10313 }
10314 };
10315
10316 auto CustomMapperCB = [&](unsigned int I) {
10317 llvm::Value *MFunc = nullptr;
10318 if (CombinedInfo.Mappers[I]) {
10319 Info.HasMapper = true;
10320 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10321 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10322 }
10323 return MFunc;
10324 };
10325
10326 // Source location for the ident struct
10327 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10328
10329 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10330 CGF.AllocaInsertPt->getIterator());
10331 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10332 CGF.Builder.GetInsertPoint());
10333 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10334 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10335 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10336 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10337 }
10338
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10341 const Expr *Device) {
10342 if (!CGF.HaveInsertPoint())
10343 return;
10344
10345 assert((isa<OMPTargetEnterDataDirective>(D) ||
10346 isa<OMPTargetExitDataDirective>(D) ||
10347 isa<OMPTargetUpdateDirective>(D)) &&
10348 "Expecting either target enter, exit data, or update directives.");
10349
10350 CodeGenFunction::OMPTargetDataInfo InputInfo;
10351 llvm::Value *MapTypesArray = nullptr;
10352 llvm::Value *MapNamesArray = nullptr;
10353 // Generate the code for the opening of the data environment.
10354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10356 // Emit device ID if any.
10357 llvm::Value *DeviceID = nullptr;
10358 if (Device) {
10359 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10360 CGF.Int64Ty, /*isSigned=*/true);
10361 } else {
10362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10363 }
10364
10365 // Emit the number of elements in the offloading arrays.
10366 llvm::Constant *PointerNum =
10367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10368
10369 // Source location for the ident struct
10370 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10371
10372 SmallVector<llvm::Value *, 13> OffloadingArgs(
10373 {RTLoc, DeviceID, PointerNum,
10374 InputInfo.BasePointersArray.emitRawPointer(CGF),
10375 InputInfo.PointersArray.emitRawPointer(CGF),
10376 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10377 InputInfo.MappersArray.emitRawPointer(CGF)});
10378
10379 // Select the right runtime function call for each standalone
10380 // directive.
10381 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10382 RuntimeFunction RTLFn;
10383 switch (D.getDirectiveKind()) {
10384 case OMPD_target_enter_data:
10385 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10386 : OMPRTL___tgt_target_data_begin_mapper;
10387 break;
10388 case OMPD_target_exit_data:
10389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10390 : OMPRTL___tgt_target_data_end_mapper;
10391 break;
10392 case OMPD_target_update:
10393 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10394 : OMPRTL___tgt_target_data_update_mapper;
10395 break;
10396 case OMPD_parallel:
10397 case OMPD_for:
10398 case OMPD_parallel_for:
10399 case OMPD_parallel_master:
10400 case OMPD_parallel_sections:
10401 case OMPD_for_simd:
10402 case OMPD_parallel_for_simd:
10403 case OMPD_cancel:
10404 case OMPD_cancellation_point:
10405 case OMPD_ordered:
10406 case OMPD_threadprivate:
10407 case OMPD_allocate:
10408 case OMPD_task:
10409 case OMPD_simd:
10410 case OMPD_tile:
10411 case OMPD_unroll:
10412 case OMPD_sections:
10413 case OMPD_section:
10414 case OMPD_single:
10415 case OMPD_master:
10416 case OMPD_critical:
10417 case OMPD_taskyield:
10418 case OMPD_barrier:
10419 case OMPD_taskwait:
10420 case OMPD_taskgroup:
10421 case OMPD_atomic:
10422 case OMPD_flush:
10423 case OMPD_depobj:
10424 case OMPD_scan:
10425 case OMPD_teams:
10426 case OMPD_target_data:
10427 case OMPD_distribute:
10428 case OMPD_distribute_simd:
10429 case OMPD_distribute_parallel_for:
10430 case OMPD_distribute_parallel_for_simd:
10431 case OMPD_teams_distribute:
10432 case OMPD_teams_distribute_simd:
10433 case OMPD_teams_distribute_parallel_for:
10434 case OMPD_teams_distribute_parallel_for_simd:
10435 case OMPD_declare_simd:
10436 case OMPD_declare_variant:
10437 case OMPD_begin_declare_variant:
10438 case OMPD_end_declare_variant:
10439 case OMPD_declare_target:
10440 case OMPD_end_declare_target:
10441 case OMPD_declare_reduction:
10442 case OMPD_declare_mapper:
10443 case OMPD_taskloop:
10444 case OMPD_taskloop_simd:
10445 case OMPD_master_taskloop:
10446 case OMPD_master_taskloop_simd:
10447 case OMPD_parallel_master_taskloop:
10448 case OMPD_parallel_master_taskloop_simd:
10449 case OMPD_target:
10450 case OMPD_target_simd:
10451 case OMPD_target_teams_distribute:
10452 case OMPD_target_teams_distribute_simd:
10453 case OMPD_target_teams_distribute_parallel_for:
10454 case OMPD_target_teams_distribute_parallel_for_simd:
10455 case OMPD_target_teams:
10456 case OMPD_target_parallel:
10457 case OMPD_target_parallel_for:
10458 case OMPD_target_parallel_for_simd:
10459 case OMPD_requires:
10460 case OMPD_metadirective:
10461 case OMPD_unknown:
10462 default:
10463 llvm_unreachable("Unexpected standalone target data directive.");
10464 break;
10465 }
10466 if (HasNowait) {
10467 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10468 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10469 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10470 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10471 }
10472 CGF.EmitRuntimeCall(
10473 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10474 OffloadingArgs);
10475 };
10476
10477 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10478 &MapNamesArray](CodeGenFunction &CGF,
10479 PrePostActionTy &) {
10480 // Fill up the arrays with all the mapped variables.
10481 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10482
10483 // Get map clause information.
10484 MappableExprsHandler MEHandler(D, CGF);
10485 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10486
10487 CGOpenMPRuntime::TargetDataInfo Info;
10488 // Fill up the arrays and create the arguments.
10489 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10490 /*IsNonContiguous=*/true);
10491 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10492 D.hasClausesOfKind<OMPNowaitClause>();
10493 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10494 llvm::codegenoptions::NoDebugInfo;
10495 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10496 EmitDebug,
10497 /*ForEndCall=*/false);
10498 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10499 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10500 CGF.VoidPtrTy, CGM.getPointerAlign());
10501 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10502 CGM.getPointerAlign());
10503 InputInfo.SizesArray =
10504 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10505 InputInfo.MappersArray =
10506 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10507 MapTypesArray = Info.RTArgs.MapTypesArray;
10508 MapNamesArray = Info.RTArgs.MapNamesArray;
10509 if (RequiresOuterTask)
10510 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10511 else
10512 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10513 };
10514
10515 if (IfCond) {
10516 emitIfClause(CGF, IfCond, TargetThenGen,
10517 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10518 } else {
10519 RegionCodeGenTy ThenRCG(TargetThenGen);
10520 ThenRCG(CGF);
10521 }
10522 }
10523
10524 namespace {
10525 /// Kind of parameter in a function with 'declare simd' directive.
10526 enum ParamKindTy {
10527 Linear,
10528 LinearRef,
10529 LinearUVal,
10530 LinearVal,
10531 Uniform,
10532 Vector,
10533 };
10534 /// Attribute set of the parameter.
10535 struct ParamAttrTy {
10536 ParamKindTy Kind = Vector;
10537 llvm::APSInt StrideOrArg;
10538 llvm::APSInt Alignment;
10539 bool HasVarStride = false;
10540 };
10541 } // namespace
10542
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10543 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10544 ArrayRef<ParamAttrTy> ParamAttrs) {
10545 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10546 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10547 // of that clause. The VLEN value must be power of 2.
10548 // In other case the notion of the function`s "characteristic data type" (CDT)
10549 // is used to compute the vector length.
10550 // CDT is defined in the following order:
10551 // a) For non-void function, the CDT is the return type.
10552 // b) If the function has any non-uniform, non-linear parameters, then the
10553 // CDT is the type of the first such parameter.
10554 // c) If the CDT determined by a) or b) above is struct, union, or class
10555 // type which is pass-by-value (except for the type that maps to the
10556 // built-in complex data type), the characteristic data type is int.
10557 // d) If none of the above three cases is applicable, the CDT is int.
10558 // The VLEN is then determined based on the CDT and the size of vector
10559 // register of that ISA for which current vector version is generated. The
10560 // VLEN is computed using the formula below:
10561 // VLEN = sizeof(vector_register) / sizeof(CDT),
10562 // where vector register size specified in section 3.2.1 Registers and the
10563 // Stack Frame of original AMD64 ABI document.
10564 QualType RetType = FD->getReturnType();
10565 if (RetType.isNull())
10566 return 0;
10567 ASTContext &C = FD->getASTContext();
10568 QualType CDT;
10569 if (!RetType.isNull() && !RetType->isVoidType()) {
10570 CDT = RetType;
10571 } else {
10572 unsigned Offset = 0;
10573 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10574 if (ParamAttrs[Offset].Kind == Vector)
10575 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10576 ++Offset;
10577 }
10578 if (CDT.isNull()) {
10579 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10580 if (ParamAttrs[I + Offset].Kind == Vector) {
10581 CDT = FD->getParamDecl(I)->getType();
10582 break;
10583 }
10584 }
10585 }
10586 }
10587 if (CDT.isNull())
10588 CDT = C.IntTy;
10589 CDT = CDT->getCanonicalTypeUnqualified();
10590 if (CDT->isRecordType() || CDT->isUnionType())
10591 CDT = C.IntTy;
10592 return C.getTypeSize(CDT);
10593 }
10594
10595 /// Mangle the parameter part of the vector function name according to
10596 /// their OpenMP classification. The mangling function is defined in
10597 /// section 4.5 of the AAVFABI(2021Q1).
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10598 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10599 SmallString<256> Buffer;
10600 llvm::raw_svector_ostream Out(Buffer);
10601 for (const auto &ParamAttr : ParamAttrs) {
10602 switch (ParamAttr.Kind) {
10603 case Linear:
10604 Out << 'l';
10605 break;
10606 case LinearRef:
10607 Out << 'R';
10608 break;
10609 case LinearUVal:
10610 Out << 'U';
10611 break;
10612 case LinearVal:
10613 Out << 'L';
10614 break;
10615 case Uniform:
10616 Out << 'u';
10617 break;
10618 case Vector:
10619 Out << 'v';
10620 break;
10621 }
10622 if (ParamAttr.HasVarStride)
10623 Out << "s" << ParamAttr.StrideOrArg;
10624 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10625 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10626 // Don't print the step value if it is not present or if it is
10627 // equal to 1.
10628 if (ParamAttr.StrideOrArg < 0)
10629 Out << 'n' << -ParamAttr.StrideOrArg;
10630 else if (ParamAttr.StrideOrArg != 1)
10631 Out << ParamAttr.StrideOrArg;
10632 }
10633
10634 if (!!ParamAttr.Alignment)
10635 Out << 'a' << ParamAttr.Alignment;
10636 }
10637
10638 return std::string(Out.str());
10639 }
10640
10641 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10642 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10643 const llvm::APSInt &VLENVal,
10644 ArrayRef<ParamAttrTy> ParamAttrs,
10645 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10646 struct ISADataTy {
10647 char ISA;
10648 unsigned VecRegSize;
10649 };
10650 ISADataTy ISAData[] = {
10651 {
10652 'b', 128
10653 }, // SSE
10654 {
10655 'c', 256
10656 }, // AVX
10657 {
10658 'd', 256
10659 }, // AVX2
10660 {
10661 'e', 512
10662 }, // AVX512
10663 };
10664 llvm::SmallVector<char, 2> Masked;
10665 switch (State) {
10666 case OMPDeclareSimdDeclAttr::BS_Undefined:
10667 Masked.push_back('N');
10668 Masked.push_back('M');
10669 break;
10670 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10671 Masked.push_back('N');
10672 break;
10673 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10674 Masked.push_back('M');
10675 break;
10676 }
10677 for (char Mask : Masked) {
10678 for (const ISADataTy &Data : ISAData) {
10679 SmallString<256> Buffer;
10680 llvm::raw_svector_ostream Out(Buffer);
10681 Out << "_ZGV" << Data.ISA << Mask;
10682 if (!VLENVal) {
10683 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10684 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10685 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10686 } else {
10687 Out << VLENVal;
10688 }
10689 Out << mangleVectorParameters(ParamAttrs);
10690 Out << '_' << Fn->getName();
10691 Fn->addFnAttr(Out.str());
10692 }
10693 }
10694 }
10695
10696 // This are the Functions that are needed to mangle the name of the
10697 // vector functions generated by the compiler, according to the rules
10698 // defined in the "Vector Function ABI specifications for AArch64",
10699 // available at
10700 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10701
10702 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
getAArch64MTV(QualType QT,ParamKindTy Kind)10703 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10704 QT = QT.getCanonicalType();
10705
10706 if (QT->isVoidType())
10707 return false;
10708
10709 if (Kind == ParamKindTy::Uniform)
10710 return false;
10711
10712 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10713 return false;
10714
10715 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10716 !QT->isReferenceType())
10717 return false;
10718
10719 return true;
10720 }
10721
10722 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10723 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10724 QT = QT.getCanonicalType();
10725 unsigned Size = C.getTypeSize(QT);
10726
10727 // Only scalars and complex within 16 bytes wide set PVB to true.
10728 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10729 return false;
10730
10731 if (QT->isFloatingType())
10732 return true;
10733
10734 if (QT->isIntegerType())
10735 return true;
10736
10737 if (QT->isPointerType())
10738 return true;
10739
10740 // TODO: Add support for complex types (section 3.1.2, item 2).
10741
10742 return false;
10743 }
10744
10745 /// Computes the lane size (LS) of a return type or of an input parameter,
10746 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10747 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10748 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10749 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10750 QualType PTy = QT.getCanonicalType()->getPointeeType();
10751 if (getAArch64PBV(PTy, C))
10752 return C.getTypeSize(PTy);
10753 }
10754 if (getAArch64PBV(QT, C))
10755 return C.getTypeSize(QT);
10756
10757 return C.getTypeSize(C.getUIntPtrType());
10758 }
10759
10760 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10761 // signature of the scalar function, as defined in 3.2.2 of the
10762 // AAVFABI.
10763 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10764 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10765 QualType RetType = FD->getReturnType().getCanonicalType();
10766
10767 ASTContext &C = FD->getASTContext();
10768
10769 bool OutputBecomesInput = false;
10770
10771 llvm::SmallVector<unsigned, 8> Sizes;
10772 if (!RetType->isVoidType()) {
10773 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10774 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10775 OutputBecomesInput = true;
10776 }
10777 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10778 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10779 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10780 }
10781
10782 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10783 // The LS of a function parameter / return value can only be a power
10784 // of 2, starting from 8 bits, up to 128.
10785 assert(llvm::all_of(Sizes,
10786 [](unsigned Size) {
10787 return Size == 8 || Size == 16 || Size == 32 ||
10788 Size == 64 || Size == 128;
10789 }) &&
10790 "Invalid size");
10791
10792 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10793 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10794 OutputBecomesInput);
10795 }
10796
10797 // Function used to add the attribute. The parameter `VLEN` is
10798 // templated to allow the use of "x" when targeting scalable functions
10799 // for SVE.
10800 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10801 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10802 char ISA, StringRef ParSeq,
10803 StringRef MangledName, bool OutputBecomesInput,
10804 llvm::Function *Fn) {
10805 SmallString<256> Buffer;
10806 llvm::raw_svector_ostream Out(Buffer);
10807 Out << Prefix << ISA << LMask << VLEN;
10808 if (OutputBecomesInput)
10809 Out << "v";
10810 Out << ParSeq << "_" << MangledName;
10811 Fn->addFnAttr(Out.str());
10812 }
10813
10814 // Helper function to generate the Advanced SIMD names depending on
10815 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10816 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10817 StringRef Prefix, char ISA,
10818 StringRef ParSeq, StringRef MangledName,
10819 bool OutputBecomesInput,
10820 llvm::Function *Fn) {
10821 switch (NDS) {
10822 case 8:
10823 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10824 OutputBecomesInput, Fn);
10825 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10826 OutputBecomesInput, Fn);
10827 break;
10828 case 16:
10829 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10830 OutputBecomesInput, Fn);
10831 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10832 OutputBecomesInput, Fn);
10833 break;
10834 case 32:
10835 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10836 OutputBecomesInput, Fn);
10837 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10838 OutputBecomesInput, Fn);
10839 break;
10840 case 64:
10841 case 128:
10842 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10843 OutputBecomesInput, Fn);
10844 break;
10845 default:
10846 llvm_unreachable("Scalar type is too wide.");
10847 }
10848 }
10849
10850 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10851 static void emitAArch64DeclareSimdFunction(
10852 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10853 ArrayRef<ParamAttrTy> ParamAttrs,
10854 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10855 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10856
10857 // Get basic data for building the vector signature.
10858 const auto Data = getNDSWDS(FD, ParamAttrs);
10859 const unsigned NDS = std::get<0>(Data);
10860 const unsigned WDS = std::get<1>(Data);
10861 const bool OutputBecomesInput = std::get<2>(Data);
10862
10863 // Check the values provided via `simdlen` by the user.
10864 // 1. A `simdlen(1)` doesn't produce vector signatures,
10865 if (UserVLEN == 1) {
10866 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10867 DiagnosticsEngine::Warning,
10868 "The clause simdlen(1) has no effect when targeting aarch64.");
10869 CGM.getDiags().Report(SLoc, DiagID);
10870 return;
10871 }
10872
10873 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10874 // Advanced SIMD output.
10875 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10876 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10877 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10878 "power of 2 when targeting Advanced SIMD.");
10879 CGM.getDiags().Report(SLoc, DiagID);
10880 return;
10881 }
10882
10883 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10884 // limits.
10885 if (ISA == 's' && UserVLEN != 0) {
10886 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10887 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10888 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10889 "lanes in the architectural constraints "
10890 "for SVE (min is 128-bit, max is "
10891 "2048-bit, by steps of 128-bit)");
10892 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10893 return;
10894 }
10895 }
10896
10897 // Sort out parameter sequence.
10898 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10899 StringRef Prefix = "_ZGV";
10900 // Generate simdlen from user input (if any).
10901 if (UserVLEN) {
10902 if (ISA == 's') {
10903 // SVE generates only a masked function.
10904 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 } else {
10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908 // Advanced SIMD generates one or two functions, depending on
10909 // the `[not]inbranch` clause.
10910 switch (State) {
10911 case OMPDeclareSimdDeclAttr::BS_Undefined:
10912 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10913 OutputBecomesInput, Fn);
10914 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10915 OutputBecomesInput, Fn);
10916 break;
10917 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10918 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10919 OutputBecomesInput, Fn);
10920 break;
10921 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10922 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10923 OutputBecomesInput, Fn);
10924 break;
10925 }
10926 }
10927 } else {
10928 // If no user simdlen is provided, follow the AAVFABI rules for
10929 // generating the vector length.
10930 if (ISA == 's') {
10931 // SVE, section 3.4.1, item 1.
10932 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10933 OutputBecomesInput, Fn);
10934 } else {
10935 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10936 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10937 // two vector names depending on the use of the clause
10938 // `[not]inbranch`.
10939 switch (State) {
10940 case OMPDeclareSimdDeclAttr::BS_Undefined:
10941 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10942 OutputBecomesInput, Fn);
10943 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10944 OutputBecomesInput, Fn);
10945 break;
10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10948 OutputBecomesInput, Fn);
10949 break;
10950 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10952 OutputBecomesInput, Fn);
10953 break;
10954 }
10955 }
10956 }
10957 }
10958
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10959 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10960 llvm::Function *Fn) {
10961 ASTContext &C = CGM.getContext();
10962 FD = FD->getMostRecentDecl();
10963 while (FD) {
10964 // Map params to their positions in function decl.
10965 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10966 if (isa<CXXMethodDecl>(FD))
10967 ParamPositions.try_emplace(FD, 0);
10968 unsigned ParamPos = ParamPositions.size();
10969 for (const ParmVarDecl *P : FD->parameters()) {
10970 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10971 ++ParamPos;
10972 }
10973 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10974 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10975 // Mark uniform parameters.
10976 for (const Expr *E : Attr->uniforms()) {
10977 E = E->IgnoreParenImpCasts();
10978 unsigned Pos;
10979 if (isa<CXXThisExpr>(E)) {
10980 Pos = ParamPositions[FD];
10981 } else {
10982 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10983 ->getCanonicalDecl();
10984 auto It = ParamPositions.find(PVD);
10985 assert(It != ParamPositions.end() && "Function parameter not found");
10986 Pos = It->second;
10987 }
10988 ParamAttrs[Pos].Kind = Uniform;
10989 }
10990 // Get alignment info.
10991 auto *NI = Attr->alignments_begin();
10992 for (const Expr *E : Attr->aligneds()) {
10993 E = E->IgnoreParenImpCasts();
10994 unsigned Pos;
10995 QualType ParmTy;
10996 if (isa<CXXThisExpr>(E)) {
10997 Pos = ParamPositions[FD];
10998 ParmTy = E->getType();
10999 } else {
11000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11001 ->getCanonicalDecl();
11002 auto It = ParamPositions.find(PVD);
11003 assert(It != ParamPositions.end() && "Function parameter not found");
11004 Pos = It->second;
11005 ParmTy = PVD->getType();
11006 }
11007 ParamAttrs[Pos].Alignment =
11008 (*NI)
11009 ? (*NI)->EvaluateKnownConstInt(C)
11010 : llvm::APSInt::getUnsigned(
11011 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11012 .getQuantity());
11013 ++NI;
11014 }
11015 // Mark linear parameters.
11016 auto *SI = Attr->steps_begin();
11017 auto *MI = Attr->modifiers_begin();
11018 for (const Expr *E : Attr->linears()) {
11019 E = E->IgnoreParenImpCasts();
11020 unsigned Pos;
11021 bool IsReferenceType = false;
11022 // Rescaling factor needed to compute the linear parameter
11023 // value in the mangled name.
11024 unsigned PtrRescalingFactor = 1;
11025 if (isa<CXXThisExpr>(E)) {
11026 Pos = ParamPositions[FD];
11027 auto *P = cast<PointerType>(E->getType());
11028 PtrRescalingFactor = CGM.getContext()
11029 .getTypeSizeInChars(P->getPointeeType())
11030 .getQuantity();
11031 } else {
11032 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11033 ->getCanonicalDecl();
11034 auto It = ParamPositions.find(PVD);
11035 assert(It != ParamPositions.end() && "Function parameter not found");
11036 Pos = It->second;
11037 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11038 PtrRescalingFactor = CGM.getContext()
11039 .getTypeSizeInChars(P->getPointeeType())
11040 .getQuantity();
11041 else if (PVD->getType()->isReferenceType()) {
11042 IsReferenceType = true;
11043 PtrRescalingFactor =
11044 CGM.getContext()
11045 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11046 .getQuantity();
11047 }
11048 }
11049 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11050 if (*MI == OMPC_LINEAR_ref)
11051 ParamAttr.Kind = LinearRef;
11052 else if (*MI == OMPC_LINEAR_uval)
11053 ParamAttr.Kind = LinearUVal;
11054 else if (IsReferenceType)
11055 ParamAttr.Kind = LinearVal;
11056 else
11057 ParamAttr.Kind = Linear;
11058 // Assuming a stride of 1, for `linear` without modifiers.
11059 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11060 if (*SI) {
11061 Expr::EvalResult Result;
11062 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11063 if (const auto *DRE =
11064 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11065 if (const auto *StridePVD =
11066 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11067 ParamAttr.HasVarStride = true;
11068 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11069 assert(It != ParamPositions.end() &&
11070 "Function parameter not found");
11071 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11072 }
11073 }
11074 } else {
11075 ParamAttr.StrideOrArg = Result.Val.getInt();
11076 }
11077 }
11078 // If we are using a linear clause on a pointer, we need to
11079 // rescale the value of linear_step with the byte size of the
11080 // pointee type.
11081 if (!ParamAttr.HasVarStride &&
11082 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11083 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11084 ++SI;
11085 ++MI;
11086 }
11087 llvm::APSInt VLENVal;
11088 SourceLocation ExprLoc;
11089 const Expr *VLENExpr = Attr->getSimdlen();
11090 if (VLENExpr) {
11091 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11092 ExprLoc = VLENExpr->getExprLoc();
11093 }
11094 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11095 if (CGM.getTriple().isX86()) {
11096 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11097 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11098 unsigned VLEN = VLENVal.getExtValue();
11099 StringRef MangledName = Fn->getName();
11100 if (CGM.getTarget().hasFeature("sve"))
11101 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11102 MangledName, 's', 128, Fn, ExprLoc);
11103 else if (CGM.getTarget().hasFeature("neon"))
11104 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11105 MangledName, 'n', 128, Fn, ExprLoc);
11106 }
11107 }
11108 FD = FD->getPreviousDecl();
11109 }
11110 }
11111
11112 namespace {
11113 /// Cleanup action for doacross support.
11114 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11115 public:
11116 static const int DoacrossFinArgs = 2;
11117
11118 private:
11119 llvm::FunctionCallee RTLFn;
11120 llvm::Value *Args[DoacrossFinArgs];
11121
11122 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11123 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11124 ArrayRef<llvm::Value *> CallArgs)
11125 : RTLFn(RTLFn) {
11126 assert(CallArgs.size() == DoacrossFinArgs);
11127 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11128 }
Emit(CodeGenFunction & CGF,Flags)11129 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11130 if (!CGF.HaveInsertPoint())
11131 return;
11132 CGF.EmitRuntimeCall(RTLFn, Args);
11133 }
11134 };
11135 } // namespace
11136
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11138 const OMPLoopDirective &D,
11139 ArrayRef<Expr *> NumIterations) {
11140 if (!CGF.HaveInsertPoint())
11141 return;
11142
11143 ASTContext &C = CGM.getContext();
11144 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11145 RecordDecl *RD;
11146 if (KmpDimTy.isNull()) {
11147 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11148 // kmp_int64 lo; // lower
11149 // kmp_int64 up; // upper
11150 // kmp_int64 st; // stride
11151 // };
11152 RD = C.buildImplicitRecord("kmp_dim");
11153 RD->startDefinition();
11154 addFieldToRecordDecl(C, RD, Int64Ty);
11155 addFieldToRecordDecl(C, RD, Int64Ty);
11156 addFieldToRecordDecl(C, RD, Int64Ty);
11157 RD->completeDefinition();
11158 KmpDimTy = C.getRecordType(RD);
11159 } else {
11160 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11161 }
11162 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11163 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11164 ArraySizeModifier::Normal, 0);
11165
11166 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11167 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11168 enum { LowerFD = 0, UpperFD, StrideFD };
11169 // Fill dims with data.
11170 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11171 LValue DimsLVal = CGF.MakeAddrLValue(
11172 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11173 // dims.upper = num_iterations;
11174 LValue UpperLVal = CGF.EmitLValueForField(
11175 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11176 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11177 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11178 Int64Ty, NumIterations[I]->getExprLoc());
11179 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11180 // dims.stride = 1;
11181 LValue StrideLVal = CGF.EmitLValueForField(
11182 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11183 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11184 StrideLVal);
11185 }
11186
11187 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188 // kmp_int32 num_dims, struct kmp_dim * dims);
11189 llvm::Value *Args[] = {
11190 emitUpdateLocation(CGF, D.getBeginLoc()),
11191 getThreadID(CGF, D.getBeginLoc()),
11192 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11194 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11195 CGM.VoidPtrTy)};
11196
11197 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11199 CGF.EmitRuntimeCall(RTLFn, Args);
11200 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11201 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11202 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11204 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11205 llvm::ArrayRef(FiniArgs));
11206 }
11207
11208 template <typename T>
EmitDoacrossOrdered(CodeGenFunction & CGF,CodeGenModule & CGM,const T * C,llvm::Value * ULoc,llvm::Value * ThreadID)11209 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11210 const T *C, llvm::Value *ULoc,
11211 llvm::Value *ThreadID) {
11212 QualType Int64Ty =
11213 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11214 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11215 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11216 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11217 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11218 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11219 const Expr *CounterVal = C->getLoopData(I);
11220 assert(CounterVal);
11221 llvm::Value *CntVal = CGF.EmitScalarConversion(
11222 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11223 CounterVal->getExprLoc());
11224 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11225 /*Volatile=*/false, Int64Ty);
11226 }
11227 llvm::Value *Args[] = {
11228 ULoc, ThreadID,
11229 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11230 llvm::FunctionCallee RTLFn;
11231 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11232 OMPDoacrossKind<T> ODK;
11233 if (ODK.isSource(C)) {
11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11235 OMPRTL___kmpc_doacross_post);
11236 } else {
11237 assert(ODK.isSink(C) && "Expect sink modifier.");
11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11239 OMPRTL___kmpc_doacross_wait);
11240 }
11241 CGF.EmitRuntimeCall(RTLFn, Args);
11242 }
11243
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11244 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11245 const OMPDependClause *C) {
11246 return EmitDoacrossOrdered<OMPDependClause>(
11247 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11248 getThreadID(CGF, C->getBeginLoc()));
11249 }
11250
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)11251 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11252 const OMPDoacrossClause *C) {
11253 return EmitDoacrossOrdered<OMPDoacrossClause>(
11254 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11255 getThreadID(CGF, C->getBeginLoc()));
11256 }
11257
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11258 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11259 llvm::FunctionCallee Callee,
11260 ArrayRef<llvm::Value *> Args) const {
11261 assert(Loc.isValid() && "Outlined function call location must be valid.");
11262 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11263
11264 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11265 if (Fn->doesNotThrow()) {
11266 CGF.EmitNounwindRuntimeCall(Fn, Args);
11267 return;
11268 }
11269 }
11270 CGF.EmitRuntimeCall(Callee, Args);
11271 }
11272
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11273 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11274 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11275 ArrayRef<llvm::Value *> Args) const {
11276 emitCall(CGF, Loc, OutlinedFn, Args);
11277 }
11278
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11279 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11280 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11281 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11282 HasEmittedDeclareTargetRegion = true;
11283 }
11284
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11285 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11286 const VarDecl *NativeParam,
11287 const VarDecl *TargetParam) const {
11288 return CGF.GetAddrOfLocalVar(NativeParam);
11289 }
11290
11291 /// Return allocator value from expression, or return a null allocator (default
11292 /// when no allocator specified).
getAllocatorVal(CodeGenFunction & CGF,const Expr * Allocator)11293 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11294 const Expr *Allocator) {
11295 llvm::Value *AllocVal;
11296 if (Allocator) {
11297 AllocVal = CGF.EmitScalarExpr(Allocator);
11298 // According to the standard, the original allocator type is a enum
11299 // (integer). Convert to pointer type, if required.
11300 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11301 CGF.getContext().VoidPtrTy,
11302 Allocator->getExprLoc());
11303 } else {
11304 // If no allocator specified, it defaults to the null allocator.
11305 AllocVal = llvm::Constant::getNullValue(
11306 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11307 }
11308 return AllocVal;
11309 }
11310
11311 /// Return the alignment from an allocate directive if present.
getAlignmentValue(CodeGenModule & CGM,const VarDecl * VD)11312 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11313 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11314
11315 if (!AllocateAlignment)
11316 return nullptr;
11317
11318 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11319 }
11320
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11321 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11322 const VarDecl *VD) {
11323 if (!VD)
11324 return Address::invalid();
11325 Address UntiedAddr = Address::invalid();
11326 Address UntiedRealAddr = Address::invalid();
11327 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11328 if (It != FunctionToUntiedTaskStackMap.end()) {
11329 const UntiedLocalVarsAddressesMap &UntiedData =
11330 UntiedLocalVarsStack[It->second];
11331 auto I = UntiedData.find(VD);
11332 if (I != UntiedData.end()) {
11333 UntiedAddr = I->second.first;
11334 UntiedRealAddr = I->second.second;
11335 }
11336 }
11337 const VarDecl *CVD = VD->getCanonicalDecl();
11338 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11339 // Use the default allocation.
11340 if (!isAllocatableDecl(VD))
11341 return UntiedAddr;
11342 llvm::Value *Size;
11343 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11344 if (CVD->getType()->isVariablyModifiedType()) {
11345 Size = CGF.getTypeSize(CVD->getType());
11346 // Align the size: ((size + align - 1) / align) * align
11347 Size = CGF.Builder.CreateNUWAdd(
11348 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11349 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11350 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11351 } else {
11352 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11353 Size = CGM.getSize(Sz.alignTo(Align));
11354 }
11355 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11356 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11357 const Expr *Allocator = AA->getAllocator();
11358 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11359 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11360 SmallVector<llvm::Value *, 4> Args;
11361 Args.push_back(ThreadID);
11362 if (Alignment)
11363 Args.push_back(Alignment);
11364 Args.push_back(Size);
11365 Args.push_back(AllocVal);
11366 llvm::omp::RuntimeFunction FnID =
11367 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11368 llvm::Value *Addr = CGF.EmitRuntimeCall(
11369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11370 getName({CVD->getName(), ".void.addr"}));
11371 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11372 CGM.getModule(), OMPRTL___kmpc_free);
11373 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11375 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11376 if (UntiedAddr.isValid())
11377 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11378
11379 // Cleanup action for allocate support.
11380 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11381 llvm::FunctionCallee RTLFn;
11382 SourceLocation::UIntTy LocEncoding;
11383 Address Addr;
11384 const Expr *AllocExpr;
11385
11386 public:
11387 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11388 SourceLocation::UIntTy LocEncoding, Address Addr,
11389 const Expr *AllocExpr)
11390 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11391 AllocExpr(AllocExpr) {}
11392 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11393 if (!CGF.HaveInsertPoint())
11394 return;
11395 llvm::Value *Args[3];
11396 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11397 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11398 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11399 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11400 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11401 Args[2] = AllocVal;
11402 CGF.EmitRuntimeCall(RTLFn, Args);
11403 }
11404 };
11405 Address VDAddr =
11406 UntiedRealAddr.isValid()
11407 ? UntiedRealAddr
11408 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11409 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11410 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11411 VDAddr, Allocator);
11412 if (UntiedRealAddr.isValid())
11413 if (auto *Region =
11414 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11415 Region->emitUntiedSwitch(CGF);
11416 return VDAddr;
11417 }
11418 return UntiedAddr;
11419 }
11420
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const11421 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11422 const VarDecl *VD) const {
11423 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11424 if (It == FunctionToUntiedTaskStackMap.end())
11425 return false;
11426 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11427 }
11428
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11429 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11430 CodeGenModule &CGM, const OMPLoopDirective &S)
11431 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433 if (!NeedToPush)
11434 return;
11435 NontemporalDeclsSet &DS =
11436 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11437 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11438 for (const Stmt *Ref : C->private_refs()) {
11439 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11440 const ValueDecl *VD;
11441 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11442 VD = DRE->getDecl();
11443 } else {
11444 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11445 assert((ME->isImplicitCXXThis() ||
11446 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11447 "Expected member of current class.");
11448 VD = ME->getMemberDecl();
11449 }
11450 DS.insert(VD);
11451 }
11452 }
11453 }
11454
~NontemporalDeclsRAII()11455 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11456 if (!NeedToPush)
11457 return;
11458 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11459 }
11460
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)11461 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11462 CodeGenFunction &CGF,
11463 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11464 std::pair<Address, Address>> &LocalVars)
11465 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11466 if (!NeedToPush)
11467 return;
11468 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11469 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11470 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11471 }
11472
~UntiedTaskLocalDeclsRAII()11473 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11474 if (!NeedToPush)
11475 return;
11476 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11477 }
11478
isNontemporalDecl(const ValueDecl * VD) const11479 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11481
11482 return llvm::any_of(
11483 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11484 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11485 }
11486
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11487 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11488 const OMPExecutableDirective &S,
11489 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11490 const {
11491 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11492 // Vars in target/task regions must be excluded completely.
11493 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11494 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11495 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11496 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11497 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11498 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11499 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11500 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11501 }
11502 }
11503 // Exclude vars in private clauses.
11504 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11505 for (const Expr *Ref : C->varlists()) {
11506 if (!Ref->getType()->isScalarType())
11507 continue;
11508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11509 if (!DRE)
11510 continue;
11511 NeedToCheckForLPCs.insert(DRE->getDecl());
11512 }
11513 }
11514 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11515 for (const Expr *Ref : C->varlists()) {
11516 if (!Ref->getType()->isScalarType())
11517 continue;
11518 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11519 if (!DRE)
11520 continue;
11521 NeedToCheckForLPCs.insert(DRE->getDecl());
11522 }
11523 }
11524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11525 for (const Expr *Ref : C->varlists()) {
11526 if (!Ref->getType()->isScalarType())
11527 continue;
11528 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11529 if (!DRE)
11530 continue;
11531 NeedToCheckForLPCs.insert(DRE->getDecl());
11532 }
11533 }
11534 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11535 for (const Expr *Ref : C->varlists()) {
11536 if (!Ref->getType()->isScalarType())
11537 continue;
11538 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11539 if (!DRE)
11540 continue;
11541 NeedToCheckForLPCs.insert(DRE->getDecl());
11542 }
11543 }
11544 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11545 for (const Expr *Ref : C->varlists()) {
11546 if (!Ref->getType()->isScalarType())
11547 continue;
11548 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11549 if (!DRE)
11550 continue;
11551 NeedToCheckForLPCs.insert(DRE->getDecl());
11552 }
11553 }
11554 for (const Decl *VD : NeedToCheckForLPCs) {
11555 for (const LastprivateConditionalData &Data :
11556 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11557 if (Data.DeclToUniqueName.count(VD) > 0) {
11558 if (!Data.Disabled)
11559 NeedToAddForLPCsAsDisabled.insert(VD);
11560 break;
11561 }
11562 }
11563 }
11564 }
11565
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11566 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11567 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11568 : CGM(CGF.CGM),
11569 Action((CGM.getLangOpts().OpenMP >= 50 &&
11570 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11571 [](const OMPLastprivateClause *C) {
11572 return C->getKind() ==
11573 OMPC_LASTPRIVATE_conditional;
11574 }))
11575 ? ActionToDo::PushAsLastprivateConditional
11576 : ActionToDo::DoNotPush) {
11577 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11578 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11579 return;
11580 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11581 "Expected a push action.");
11582 LastprivateConditionalData &Data =
11583 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11584 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11585 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11586 continue;
11587
11588 for (const Expr *Ref : C->varlists()) {
11589 Data.DeclToUniqueName.insert(std::make_pair(
11590 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11591 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11592 }
11593 }
11594 Data.IVLVal = IVLVal;
11595 Data.Fn = CGF.CurFn;
11596 }
11597
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11598 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11599 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11600 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11601 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11602 if (CGM.getLangOpts().OpenMP < 50)
11603 return;
11604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11605 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11606 if (!NeedToAddForLPCsAsDisabled.empty()) {
11607 Action = ActionToDo::DisableLastprivateConditional;
11608 LastprivateConditionalData &Data =
11609 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11610 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11611 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11612 Data.Fn = CGF.CurFn;
11613 Data.Disabled = true;
11614 }
11615 }
11616
11617 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11618 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11619 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11620 return LastprivateConditionalRAII(CGF, S);
11621 }
11622
~LastprivateConditionalRAII()11623 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11624 if (CGM.getLangOpts().OpenMP < 50)
11625 return;
11626 if (Action == ActionToDo::DisableLastprivateConditional) {
11627 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11628 "Expected list of disabled private vars.");
11629 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11630 }
11631 if (Action == ActionToDo::PushAsLastprivateConditional) {
11632 assert(
11633 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11634 "Expected list of lastprivate conditional vars.");
11635 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11636 }
11637 }
11638
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11639 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11640 const VarDecl *VD) {
11641 ASTContext &C = CGM.getContext();
11642 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11643 if (I == LastprivateConditionalToTypes.end())
11644 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11645 QualType NewType;
11646 const FieldDecl *VDField;
11647 const FieldDecl *FiredField;
11648 LValue BaseLVal;
11649 auto VI = I->getSecond().find(VD);
11650 if (VI == I->getSecond().end()) {
11651 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11652 RD->startDefinition();
11653 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11654 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11655 RD->completeDefinition();
11656 NewType = C.getRecordType(RD);
11657 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11658 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11659 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11660 } else {
11661 NewType = std::get<0>(VI->getSecond());
11662 VDField = std::get<1>(VI->getSecond());
11663 FiredField = std::get<2>(VI->getSecond());
11664 BaseLVal = std::get<3>(VI->getSecond());
11665 }
11666 LValue FiredLVal =
11667 CGF.EmitLValueForField(BaseLVal, FiredField);
11668 CGF.EmitStoreOfScalar(
11669 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11670 FiredLVal);
11671 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11672 }
11673
11674 namespace {
11675 /// Checks if the lastprivate conditional variable is referenced in LHS.
11676 class LastprivateConditionalRefChecker final
11677 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11678 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11679 const Expr *FoundE = nullptr;
11680 const Decl *FoundD = nullptr;
11681 StringRef UniqueDeclName;
11682 LValue IVLVal;
11683 llvm::Function *FoundFn = nullptr;
11684 SourceLocation Loc;
11685
11686 public:
VisitDeclRefExpr(const DeclRefExpr * E)11687 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11688 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11689 llvm::reverse(LPM)) {
11690 auto It = D.DeclToUniqueName.find(E->getDecl());
11691 if (It == D.DeclToUniqueName.end())
11692 continue;
11693 if (D.Disabled)
11694 return false;
11695 FoundE = E;
11696 FoundD = E->getDecl()->getCanonicalDecl();
11697 UniqueDeclName = It->second;
11698 IVLVal = D.IVLVal;
11699 FoundFn = D.Fn;
11700 break;
11701 }
11702 return FoundE == E;
11703 }
VisitMemberExpr(const MemberExpr * E)11704 bool VisitMemberExpr(const MemberExpr *E) {
11705 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11706 return false;
11707 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11708 llvm::reverse(LPM)) {
11709 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11710 if (It == D.DeclToUniqueName.end())
11711 continue;
11712 if (D.Disabled)
11713 return false;
11714 FoundE = E;
11715 FoundD = E->getMemberDecl()->getCanonicalDecl();
11716 UniqueDeclName = It->second;
11717 IVLVal = D.IVLVal;
11718 FoundFn = D.Fn;
11719 break;
11720 }
11721 return FoundE == E;
11722 }
VisitStmt(const Stmt * S)11723 bool VisitStmt(const Stmt *S) {
11724 for (const Stmt *Child : S->children()) {
11725 if (!Child)
11726 continue;
11727 if (const auto *E = dyn_cast<Expr>(Child))
11728 if (!E->isGLValue())
11729 continue;
11730 if (Visit(Child))
11731 return true;
11732 }
11733 return false;
11734 }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11735 explicit LastprivateConditionalRefChecker(
11736 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11737 : LPM(LPM) {}
11738 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11739 getFoundData() const {
11740 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11741 }
11742 };
11743 } // namespace
11744
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11745 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11746 LValue IVLVal,
11747 StringRef UniqueDeclName,
11748 LValue LVal,
11749 SourceLocation Loc) {
11750 // Last updated loop counter for the lastprivate conditional var.
11751 // int<xx> last_iv = 0;
11752 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11753 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11754 LLIVTy, getName({UniqueDeclName, "iv"}));
11755 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11756 IVLVal.getAlignment().getAsAlign());
11757 LValue LastIVLVal =
11758 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11759
11760 // Last value of the lastprivate conditional.
11761 // decltype(priv_a) last_a;
11762 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11763 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11764 cast<llvm::GlobalVariable>(Last)->setAlignment(
11765 LVal.getAlignment().getAsAlign());
11766 LValue LastLVal =
11767 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11768
11769 // Global loop counter. Required to handle inner parallel-for regions.
11770 // iv
11771 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11772
11773 // #pragma omp critical(a)
11774 // if (last_iv <= iv) {
11775 // last_iv = iv;
11776 // last_a = priv_a;
11777 // }
11778 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11779 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11780 Action.Enter(CGF);
11781 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11782 // (last_iv <= iv) ? Check if the variable is updated and store new
11783 // value in global var.
11784 llvm::Value *CmpRes;
11785 if (IVLVal.getType()->isSignedIntegerType()) {
11786 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11787 } else {
11788 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11789 "Loop iteration variable must be integer.");
11790 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11791 }
11792 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11793 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11794 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11795 // {
11796 CGF.EmitBlock(ThenBB);
11797
11798 // last_iv = iv;
11799 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11800
11801 // last_a = priv_a;
11802 switch (CGF.getEvaluationKind(LVal.getType())) {
11803 case TEK_Scalar: {
11804 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11805 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11806 break;
11807 }
11808 case TEK_Complex: {
11809 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11810 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11811 break;
11812 }
11813 case TEK_Aggregate:
11814 llvm_unreachable(
11815 "Aggregates are not supported in lastprivate conditional.");
11816 }
11817 // }
11818 CGF.EmitBranch(ExitBB);
11819 // There is no need to emit line number for unconditional branch.
11820 (void)ApplyDebugLocation::CreateEmpty(CGF);
11821 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11822 };
11823
11824 if (CGM.getLangOpts().OpenMPSimd) {
11825 // Do not emit as a critical region as no parallel region could be emitted.
11826 RegionCodeGenTy ThenRCG(CodeGen);
11827 ThenRCG(CGF);
11828 } else {
11829 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11830 }
11831 }
11832
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11833 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11834 const Expr *LHS) {
11835 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11836 return;
11837 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11838 if (!Checker.Visit(LHS))
11839 return;
11840 const Expr *FoundE;
11841 const Decl *FoundD;
11842 StringRef UniqueDeclName;
11843 LValue IVLVal;
11844 llvm::Function *FoundFn;
11845 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11846 Checker.getFoundData();
11847 if (FoundFn != CGF.CurFn) {
11848 // Special codegen for inner parallel regions.
11849 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11850 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11851 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11852 "Lastprivate conditional is not found in outer region.");
11853 QualType StructTy = std::get<0>(It->getSecond());
11854 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11855 LValue PrivLVal = CGF.EmitLValue(FoundE);
11856 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11857 PrivLVal.getAddress(),
11858 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11859 CGF.ConvertTypeForMem(StructTy));
11860 LValue BaseLVal =
11861 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11862 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11863 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11864 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11865 FiredLVal, llvm::AtomicOrdering::Unordered,
11866 /*IsVolatile=*/true, /*isInit=*/false);
11867 return;
11868 }
11869
11870 // Private address of the lastprivate conditional in the current context.
11871 // priv_a
11872 LValue LVal = CGF.EmitLValue(FoundE);
11873 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11874 FoundE->getExprLoc());
11875 }
11876
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11877 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11878 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11879 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11880 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11881 return;
11882 auto Range = llvm::reverse(LastprivateConditionalStack);
11883 auto It = llvm::find_if(
11884 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11885 if (It == Range.end() || It->Fn != CGF.CurFn)
11886 return;
11887 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11888 assert(LPCI != LastprivateConditionalToTypes.end() &&
11889 "Lastprivates must be registered already.");
11890 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11891 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11892 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11893 for (const auto &Pair : It->DeclToUniqueName) {
11894 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11895 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11896 continue;
11897 auto I = LPCI->getSecond().find(Pair.first);
11898 assert(I != LPCI->getSecond().end() &&
11899 "Lastprivate must be rehistered already.");
11900 // bool Cmp = priv_a.Fired != 0;
11901 LValue BaseLVal = std::get<3>(I->getSecond());
11902 LValue FiredLVal =
11903 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11904 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11905 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11906 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11907 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11908 // if (Cmp) {
11909 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11910 CGF.EmitBlock(ThenBB);
11911 Address Addr = CGF.GetAddrOfLocalVar(VD);
11912 LValue LVal;
11913 if (VD->getType()->isReferenceType())
11914 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11915 AlignmentSource::Decl);
11916 else
11917 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11918 AlignmentSource::Decl);
11919 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11920 D.getBeginLoc());
11921 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11922 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11923 // }
11924 }
11925 }
11926
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11927 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11928 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11929 SourceLocation Loc) {
11930 if (CGF.getLangOpts().OpenMP < 50)
11931 return;
11932 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11933 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11934 "Unknown lastprivate conditional variable.");
11935 StringRef UniqueName = It->second;
11936 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11937 // The variable was not updated in the region - exit.
11938 if (!GV)
11939 return;
11940 LValue LPLVal = CGF.MakeRawAddrLValue(
11941 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11942 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11943 CGF.EmitStoreOfScalar(Res, PrivLVal);
11944 }
11945
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11946 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11947 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11948 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11949 const RegionCodeGenTy &CodeGen) {
11950 llvm_unreachable("Not supported in SIMD-only mode");
11951 }
11952
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11953 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11954 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11955 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11956 const RegionCodeGenTy &CodeGen) {
11957 llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11960 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11961 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11962 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11963 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11964 bool Tied, unsigned &NumberOfParts) {
11965 llvm_unreachable("Not supported in SIMD-only mode");
11966 }
11967
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)11968 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11969 SourceLocation Loc,
11970 llvm::Function *OutlinedFn,
11971 ArrayRef<llvm::Value *> CapturedVars,
11972 const Expr *IfCond,
11973 llvm::Value *NumThreads) {
11974 llvm_unreachable("Not supported in SIMD-only mode");
11975 }
11976
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11977 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11978 CodeGenFunction &CGF, StringRef CriticalName,
11979 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11980 const Expr *Hint) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11982 }
11983
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11984 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11985 const RegionCodeGenTy &MasterOpGen,
11986 SourceLocation Loc) {
11987 llvm_unreachable("Not supported in SIMD-only mode");
11988 }
11989
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)11990 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11991 const RegionCodeGenTy &MasterOpGen,
11992 SourceLocation Loc,
11993 const Expr *Filter) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11995 }
11996
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11997 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11998 SourceLocation Loc) {
11999 llvm_unreachable("Not supported in SIMD-only mode");
12000 }
12001
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12002 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12003 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12004 SourceLocation Loc) {
12005 llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12008 void CGOpenMPSIMDRuntime::emitSingleRegion(
12009 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12010 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12011 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12012 ArrayRef<const Expr *> AssignmentOps) {
12013 llvm_unreachable("Not supported in SIMD-only mode");
12014 }
12015
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12016 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12017 const RegionCodeGenTy &OrderedOpGen,
12018 SourceLocation Loc,
12019 bool IsThreads) {
12020 llvm_unreachable("Not supported in SIMD-only mode");
12021 }
12022
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12023 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12024 SourceLocation Loc,
12025 OpenMPDirectiveKind Kind,
12026 bool EmitChecks,
12027 bool ForceSimpleCall) {
12028 llvm_unreachable("Not supported in SIMD-only mode");
12029 }
12030
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12031 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12032 CodeGenFunction &CGF, SourceLocation Loc,
12033 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12034 bool Ordered, const DispatchRTInput &DispatchValues) {
12035 llvm_unreachable("Not supported in SIMD-only mode");
12036 }
12037
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)12038 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12039 SourceLocation Loc) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12041 }
12042
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12043 void CGOpenMPSIMDRuntime::emitForStaticInit(
12044 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12045 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12047 }
12048
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12049 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12050 CodeGenFunction &CGF, SourceLocation Loc,
12051 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12053 }
12054
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12055 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12056 SourceLocation Loc,
12057 unsigned IVSize,
12058 bool IVSigned) {
12059 llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12062 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12063 SourceLocation Loc,
12064 OpenMPDirectiveKind DKind) {
12065 llvm_unreachable("Not supported in SIMD-only mode");
12066 }
12067
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12068 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12069 SourceLocation Loc,
12070 unsigned IVSize, bool IVSigned,
12071 Address IL, Address LB,
12072 Address UB, Address ST) {
12073 llvm_unreachable("Not supported in SIMD-only mode");
12074 }
12075
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12076 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12077 llvm::Value *NumThreads,
12078 SourceLocation Loc) {
12079 llvm_unreachable("Not supported in SIMD-only mode");
12080 }
12081
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12082 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12083 ProcBindKind ProcBind,
12084 SourceLocation Loc) {
12085 llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12088 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12089 const VarDecl *VD,
12090 Address VDAddr,
12091 SourceLocation Loc) {
12092 llvm_unreachable("Not supported in SIMD-only mode");
12093 }
12094
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12095 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12096 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12097 CodeGenFunction *CGF) {
12098 llvm_unreachable("Not supported in SIMD-only mode");
12099 }
12100
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12101 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12102 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12104 }
12105
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12106 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12107 ArrayRef<const Expr *> Vars,
12108 SourceLocation Loc,
12109 llvm::AtomicOrdering AO) {
12110 llvm_unreachable("Not supported in SIMD-only mode");
12111 }
12112
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12113 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12114 const OMPExecutableDirective &D,
12115 llvm::Function *TaskFunction,
12116 QualType SharedsTy, Address Shareds,
12117 const Expr *IfCond,
12118 const OMPTaskDataTy &Data) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12120 }
12121
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12122 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12123 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12124 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12125 const Expr *IfCond, const OMPTaskDataTy &Data) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12127 }
12128
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12129 void CGOpenMPSIMDRuntime::emitReduction(
12130 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12131 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12132 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12133 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12134 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12135 ReductionOps, Options);
12136 }
12137
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12138 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12139 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12140 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12141 llvm_unreachable("Not supported in SIMD-only mode");
12142 }
12143
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12144 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12145 SourceLocation Loc,
12146 bool IsWorksharingReduction) {
12147 llvm_unreachable("Not supported in SIMD-only mode");
12148 }
12149
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12150 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12151 SourceLocation Loc,
12152 ReductionCodeGen &RCG,
12153 unsigned N) {
12154 llvm_unreachable("Not supported in SIMD-only mode");
12155 }
12156
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12157 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12158 SourceLocation Loc,
12159 llvm::Value *ReductionsPtr,
12160 LValue SharedLVal) {
12161 llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)12164 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12165 SourceLocation Loc,
12166 const OMPTaskDataTy &Data) {
12167 llvm_unreachable("Not supported in SIMD-only mode");
12168 }
12169
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12170 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12171 CodeGenFunction &CGF, SourceLocation Loc,
12172 OpenMPDirectiveKind CancelRegion) {
12173 llvm_unreachable("Not supported in SIMD-only mode");
12174 }
12175
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12176 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12177 SourceLocation Loc, const Expr *IfCond,
12178 OpenMPDirectiveKind CancelRegion) {
12179 llvm_unreachable("Not supported in SIMD-only mode");
12180 }
12181
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12182 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12183 const OMPExecutableDirective &D, StringRef ParentName,
12184 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12185 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12186 llvm_unreachable("Not supported in SIMD-only mode");
12187 }
12188
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12189 void CGOpenMPSIMDRuntime::emitTargetCall(
12190 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12191 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12192 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12193 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12194 const OMPLoopDirective &D)>
12195 SizeEmitter) {
12196 llvm_unreachable("Not supported in SIMD-only mode");
12197 }
12198
emitTargetFunctions(GlobalDecl GD)12199 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201 }
12202
emitTargetGlobalVariable(GlobalDecl GD)12203 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12204 llvm_unreachable("Not supported in SIMD-only mode");
12205 }
12206
emitTargetGlobal(GlobalDecl GD)12207 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12208 return false;
12209 }
12210
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)12211 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12212 const OMPExecutableDirective &D,
12213 SourceLocation Loc,
12214 llvm::Function *OutlinedFn,
12215 ArrayRef<llvm::Value *> CapturedVars) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12217 }
12218
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)12219 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12220 const Expr *NumTeams,
12221 const Expr *ThreadLimit,
12222 SourceLocation Loc) {
12223 llvm_unreachable("Not supported in SIMD-only mode");
12224 }
12225
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)12226 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12227 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12228 const Expr *Device, const RegionCodeGenTy &CodeGen,
12229 CGOpenMPRuntime::TargetDataInfo &Info) {
12230 llvm_unreachable("Not supported in SIMD-only mode");
12231 }
12232
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)12233 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12234 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12235 const Expr *Device) {
12236 llvm_unreachable("Not supported in SIMD-only mode");
12237 }
12238
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12239 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12240 const OMPLoopDirective &D,
12241 ArrayRef<Expr *> NumIterations) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12243 }
12244
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12245 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12246 const OMPDependClause *C) {
12247 llvm_unreachable("Not supported in SIMD-only mode");
12248 }
12249
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)12250 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12251 const OMPDoacrossClause *C) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12253 }
12254
12255 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12256 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12257 const VarDecl *NativeParam) const {
12258 llvm_unreachable("Not supported in SIMD-only mode");
12259 }
12260
12261 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12262 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12263 const VarDecl *NativeParam,
12264 const VarDecl *TargetParam) const {
12265 llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267