xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGDebugInfo.h"
18 #include "CGRecordLayout.h"
19 #include "CodeGenFunction.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/APValue.h"
22 #include "clang/AST/Attr.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OpenMPClause.h"
25 #include "clang/AST/StmtOpenMP.h"
26 #include "clang/AST/StmtVisitor.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <numeric>
44 #include <optional>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
emitUntiedSwitch(CodeGenFunction &)92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
getRegionKind() const94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
getDirectiveKind() const96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
hasCancel() const98   bool hasCancel() const { return HasCancel; }
99 
classof(const CGCapturedStmtInfo * Info)100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
getThreadIDVariable() const128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
getHelperName() const131   StringRef getHelperName() const override { return HelperName; }
132 
classof(const CGCapturedStmtInfo * Info)133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
emitUntiedSwitch(CodeGenFunction & CGF) const177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
getNumberOfParts() const195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
getThreadIDVariable() const209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
getHelperName() const215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
emitUntiedSwitch(CodeGenFunction & CGF)217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
classof(const CGCapturedStmtInfo * Info)221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
getContextValue() const247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
setContextValue(llvm::Value * V)253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
getThisFieldDecl() const270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
getThreadIDVariable() const278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
getHelperName() const292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
emitUntiedSwitch(CodeGenFunction & CGF)298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
getOldCSI() const303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
classof(const CGCapturedStmtInfo * Info)305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
getThreadIDVariable() const333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
getHelperName() const336   StringRef getHelperName() const override { return HelperName; }
337 
classof(const CGCapturedStmtInfo * Info)338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
getThreadIDVariable() const393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
getHelperName() const398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
classof(const CGCapturedStmtInfo * Info)402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
~InlinedOpenMPRegionRAII()438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 /// Describes ident structure that describes a source location.
482 /// All descriptions are taken from
483 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484 /// Original structure:
485 /// typedef struct ident {
486 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
487 ///                                  see above  */
488 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
489 ///                                  KMP_IDENT_KMPC identifies this union
490 ///                                  member  */
491 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
492 ///                                  see above */
493 ///#if USE_ITT_BUILD
494 ///                            /*  but currently used for storing
495 ///                                region-specific ITT */
496 ///                            /*  contextual information. */
497 ///#endif /* USE_ITT_BUILD */
498 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
499 ///                                 C++  */
500 ///    char const *psource;    /**< String describing the source location.
501 ///                            The string is composed of semi-colon separated
502 //                             fields which describe the source file,
503 ///                            the function and a pair of line numbers that
504 ///                            delimit the construct.
505 ///                             */
506 /// } ident_t;
507 enum IdentFieldIndex {
508   /// might be used in Fortran
509   IdentField_Reserved_1,
510   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511   IdentField_Flags,
512   /// Not really used in Fortran any more
513   IdentField_Reserved_2,
514   /// Source[4] in Fortran, do not use for C++
515   IdentField_Reserved_3,
516   /// String describing the source location. The string is composed of
517   /// semi-colon separated fields which describe the source file, the function
518   /// and a pair of line numbers that delimit the construct.
519   IdentField_PSource
520 };
521 
522 /// Schedule types for 'omp for' loops (these enumerators are taken from
523 /// the enum sched_type in kmp.h).
524 enum OpenMPSchedType {
525   /// Lower bound for default (unordered) versions.
526   OMP_sch_lower = 32,
527   OMP_sch_static_chunked = 33,
528   OMP_sch_static = 34,
529   OMP_sch_dynamic_chunked = 35,
530   OMP_sch_guided_chunked = 36,
531   OMP_sch_runtime = 37,
532   OMP_sch_auto = 38,
533   /// static with chunk adjustment (e.g., simd)
534   OMP_sch_static_balanced_chunked = 45,
535   /// Lower bound for 'ordered' versions.
536   OMP_ord_lower = 64,
537   OMP_ord_static_chunked = 65,
538   OMP_ord_static = 66,
539   OMP_ord_dynamic_chunked = 67,
540   OMP_ord_guided_chunked = 68,
541   OMP_ord_runtime = 69,
542   OMP_ord_auto = 70,
543   OMP_sch_default = OMP_sch_static,
544   /// dist_schedule types
545   OMP_dist_sch_static_chunked = 91,
546   OMP_dist_sch_static = 92,
547   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548   /// Set if the monotonic schedule modifier was present.
549   OMP_sch_modifier_monotonic = (1 << 29),
550   /// Set if the nonmonotonic schedule modifier was present.
551   OMP_sch_modifier_nonmonotonic = (1 << 30),
552 };
553 
554 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555 /// region.
556 class CleanupTy final : public EHScopeStack::Cleanup {
557   PrePostActionTy *Action;
558 
559 public:
CleanupTy(PrePostActionTy * Action)560   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)561   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562     if (!CGF.HaveInsertPoint())
563       return;
564     Action->Exit(CGF);
565   }
566 };
567 
568 } // anonymous namespace
569 
operator ()(CodeGenFunction & CGF) const570 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571   CodeGenFunction::RunCleanupsScope Scope(CGF);
572   if (PrePostAction) {
573     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574     Callback(CodeGen, CGF, *PrePostAction);
575   } else {
576     PrePostActionTy Action;
577     Callback(CodeGen, CGF, Action);
578   }
579 }
580 
581 /// Check if the combiner is a call to UDR combiner and if it is so return the
582 /// UDR decl used for reduction.
583 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)584 getReductionInit(const Expr *ReductionOp) {
585   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587       if (const auto *DRE =
588               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590           return DRD;
591   return nullptr;
592 }
593 
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)594 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595                                              const OMPDeclareReductionDecl *DRD,
596                                              const Expr *InitOp,
597                                              Address Private, Address Original,
598                                              QualType Ty) {
599   if (DRD->getInitializer()) {
600     std::pair<llvm::Function *, llvm::Function *> Reduction =
601         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
602     const auto *CE = cast<CallExpr>(InitOp);
603     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606     const auto *LHSDRE =
607         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608     const auto *RHSDRE =
609         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613     (void)PrivateScope.Privatize();
614     RValue Func = RValue::get(Reduction.second);
615     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616     CGF.EmitIgnoredExpr(InitOp);
617   } else {
618     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620     auto *GV = new llvm::GlobalVariable(
621         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622         llvm::GlobalValue::PrivateLinkage, Init, Name);
623     LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624     RValue InitRVal;
625     switch (CGF.getEvaluationKind(Ty)) {
626     case TEK_Scalar:
627       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628       break;
629     case TEK_Complex:
630       InitRVal =
631           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
632       break;
633     case TEK_Aggregate: {
634       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637                            /*IsInitializer=*/false);
638       return;
639     }
640     }
641     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644                          /*IsInitializer=*/false);
645   }
646 }
647 
648 /// Emit initialization of arrays of complex types.
649 /// \param DestAddr Address of the array.
650 /// \param Type Type of array.
651 /// \param Init Initial expression of array.
652 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())653 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654                                  QualType Type, bool EmitDeclareReductionInit,
655                                  const Expr *Init,
656                                  const OMPDeclareReductionDecl *DRD,
657                                  Address SrcAddr = Address::invalid()) {
658   // Perform element-by-element initialization.
659   QualType ElementTy;
660 
661   // Drill down to the base element type on both arrays.
662   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664   if (DRD)
665     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666 
667   llvm::Value *SrcBegin = nullptr;
668   if (DRD)
669     SrcBegin = SrcAddr.emitRawPointer(CGF);
670   llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671   // Cast from pointer to array type to pointer to single element.
672   llvm::Value *DestEnd =
673       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674   // The basic structure here is a while-do loop.
675   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677   llvm::Value *IsEmpty =
678       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680 
681   // Enter the loop body, making that address the current address.
682   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683   CGF.EmitBlock(BodyBB);
684 
685   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686 
687   llvm::PHINode *SrcElementPHI = nullptr;
688   Address SrcElementCurrent = Address::invalid();
689   if (DRD) {
690     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691                                           "omp.arraycpy.srcElementPast");
692     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693     SrcElementCurrent =
694         Address(SrcElementPHI, SrcAddr.getElementType(),
695                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696   }
697   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699   DestElementPHI->addIncoming(DestBegin, EntryBB);
700   Address DestElementCurrent =
701       Address(DestElementPHI, DestAddr.getElementType(),
702               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703 
704   // Emit copy.
705   {
706     CodeGenFunction::RunCleanupsScope InitScope(CGF);
707     if (EmitDeclareReductionInit) {
708       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709                                        SrcElementCurrent, ElementTy);
710     } else
711       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712                            /*IsInitializer=*/false);
713   }
714 
715   if (DRD) {
716     // Shift the address forward by one element.
717     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719         "omp.arraycpy.dest.element");
720     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721   }
722 
723   // Shift the address forward by one element.
724   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726       "omp.arraycpy.dest.element");
727   // Check whether we've reached the end.
728   llvm::Value *Done =
729       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732 
733   // Done.
734   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735 }
736 
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)737 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738   return CGF.EmitOMPSharedLValue(E);
739 }
740 
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)741 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742                                             const Expr *E) {
743   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744     return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745   return LValue();
746 }
747 
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,const OMPDeclareReductionDecl * DRD)748 void ReductionCodeGen::emitAggregateInitialization(
749     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750     const OMPDeclareReductionDecl *DRD) {
751   // Emit VarDecl with copy init for arrays.
752   // Get the address of the original variable captured in current
753   // captured region.
754   const auto *PrivateVD =
755       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756   bool EmitDeclareReductionInit =
757       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759                        EmitDeclareReductionInit,
760                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761                                                 : PrivateVD->getInit(),
762                        DRD, SharedAddr);
763 }
764 
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)765 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766                                    ArrayRef<const Expr *> Origs,
767                                    ArrayRef<const Expr *> Privates,
768                                    ArrayRef<const Expr *> ReductionOps) {
769   ClausesData.reserve(Shareds.size());
770   SharedAddresses.reserve(Shareds.size());
771   Sizes.reserve(Shareds.size());
772   BaseDecls.reserve(Shareds.size());
773   const auto *IOrig = Origs.begin();
774   const auto *IPriv = Privates.begin();
775   const auto *IRed = ReductionOps.begin();
776   for (const Expr *Ref : Shareds) {
777     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778     std::advance(IOrig, 1);
779     std::advance(IPriv, 1);
780     std::advance(IRed, 1);
781   }
782 }
783 
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)784 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786          "Number of generated lvalues must be exactly N.");
787   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789   SharedAddresses.emplace_back(First, Second);
790   if (ClausesData[N].Shared == ClausesData[N].Ref) {
791     OrigAddresses.emplace_back(First, Second);
792   } else {
793     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795     OrigAddresses.emplace_back(First, Second);
796   }
797 }
798 
emitAggregateType(CodeGenFunction & CGF,unsigned N)799 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800   QualType PrivateType = getPrivateType(N);
801   bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802   if (!PrivateType->isVariablyModifiedType()) {
803     Sizes.emplace_back(
804         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805         nullptr);
806     return;
807   }
808   llvm::Value *Size;
809   llvm::Value *SizeInChars;
810   auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812   if (AsArraySection) {
813     Size = CGF.Builder.CreatePtrDiff(ElemType,
814                                      OrigAddresses[N].second.getPointer(CGF),
815                                      OrigAddresses[N].first.getPointer(CGF));
816     Size = CGF.Builder.CreateNUWAdd(
817         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819   } else {
820     SizeInChars =
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823   }
824   Sizes.emplace_back(SizeInChars, Size);
825   CodeGenFunction::OpaqueValueMapping OpaqueMap(
826       CGF,
827       cast<OpaqueValueExpr>(
828           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829       RValue::get(Size));
830   CGF.EmitVariablyModifiedType(PrivateType);
831 }
832 
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)833 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834                                          llvm::Value *Size) {
835   QualType PrivateType = getPrivateType(N);
836   if (!PrivateType->isVariablyModifiedType()) {
837     assert(!Size && !Sizes[N].second &&
838            "Size should be nullptr for non-variably modified reduction "
839            "items.");
840     return;
841   }
842   CodeGenFunction::OpaqueValueMapping OpaqueMap(
843       CGF,
844       cast<OpaqueValueExpr>(
845           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846       RValue::get(Size));
847   CGF.EmitVariablyModifiedType(PrivateType);
848 }
849 
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,Address SharedAddr,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)850 void ReductionCodeGen::emitInitialization(
851     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853   assert(SharedAddresses.size() > N && "No variable was generated");
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   const OMPDeclareReductionDecl *DRD =
857       getReductionInit(ClausesData[N].ReductionOp);
858   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859     if (DRD && DRD->getInitializer())
860       (void)DefaultInit(CGF);
861     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863     (void)DefaultInit(CGF);
864     QualType SharedType = SharedAddresses[N].first.getType();
865     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866                                      PrivateAddr, SharedAddr, SharedType);
867   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870                          PrivateVD->getType().getQualifiers(),
871                          /*IsInitializer=*/false);
872   }
873 }
874 
needCleanups(unsigned N)875 bool ReductionCodeGen::needCleanups(unsigned N) {
876   QualType PrivateType = getPrivateType(N);
877   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878   return DTorKind != QualType::DK_none;
879 }
880 
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)881 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882                                     Address PrivateAddr) {
883   QualType PrivateType = getPrivateType(N);
884   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885   if (needCleanups(N)) {
886     PrivateAddr =
887         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889   }
890 }
891 
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)892 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893                           LValue BaseLV) {
894   BaseTy = BaseTy.getNonReferenceType();
895   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899     } else {
900       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902     }
903     BaseTy = BaseTy->getPointeeType();
904   }
905   return CGF.MakeAddrLValue(
906       BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907       BaseLV.getType(), BaseLV.getBaseInfo(),
908       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909 }
910 
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,Address OriginalBaseAddress,llvm::Value * Addr)911 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912                           Address OriginalBaseAddress, llvm::Value *Addr) {
913   RawAddress Tmp = RawAddress::invalid();
914   Address TopTmp = Address::invalid();
915   Address MostTopTmp = Address::invalid();
916   BaseTy = BaseTy.getNonReferenceType();
917   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919     Tmp = CGF.CreateMemTemp(BaseTy);
920     if (TopTmp.isValid())
921       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922     else
923       MostTopTmp = Tmp;
924     TopTmp = Tmp;
925     BaseTy = BaseTy->getPointeeType();
926   }
927 
928   if (Tmp.isValid()) {
929     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930         Addr, Tmp.getElementType());
931     CGF.Builder.CreateStore(Addr, Tmp);
932     return MostTopTmp;
933   }
934 
935   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936       Addr, OriginalBaseAddress.getType());
937   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938 }
939 
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)940 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941   const VarDecl *OrigVD = nullptr;
942   if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944     while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945       Base = TempOASE->getBase()->IgnoreParenImpCasts();
946     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947       Base = TempASE->getBase()->IgnoreParenImpCasts();
948     DE = cast<DeclRefExpr>(Base);
949     OrigVD = cast<VarDecl>(DE->getDecl());
950   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953       Base = TempASE->getBase()->IgnoreParenImpCasts();
954     DE = cast<DeclRefExpr>(Base);
955     OrigVD = cast<VarDecl>(DE->getDecl());
956   }
957   return OrigVD;
958 }
959 
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)960 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961                                                Address PrivateAddr) {
962   const DeclRefExpr *DE;
963   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964     BaseDecls.emplace_back(OrigVD);
965     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966     LValue BaseLValue =
967         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968                     OriginalBaseLValue);
969     Address SharedAddr = SharedAddresses[N].first.getAddress();
970     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972         SharedAddr.emitRawPointer(CGF));
973     llvm::Value *PrivatePointer =
974         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975             PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976     llvm::Value *Ptr = CGF.Builder.CreateGEP(
977         SharedAddr.getElementType(), PrivatePointer, Adjustment);
978     return castToBase(CGF, OrigVD->getType(),
979                       SharedAddresses[N].first.getType(),
980                       OriginalBaseLValue.getAddress(), Ptr);
981   }
982   BaseDecls.emplace_back(
983       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984   return PrivateAddr;
985 }
986 
usesReductionInitializer(unsigned N) const987 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988   const OMPDeclareReductionDecl *DRD =
989       getReductionInit(ClausesData[N].ReductionOp);
990   return DRD && DRD->getInitializer();
991 }
992 
getThreadIDVariableLValue(CodeGenFunction & CGF)993 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994   return CGF.EmitLoadOfPointerLValue(
995       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996       getThreadIDVariable()->getType()->castAs<PointerType>());
997 }
998 
EmitBody(CodeGenFunction & CGF,const Stmt * S)999 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000   if (!CGF.HaveInsertPoint())
1001     return;
1002   // 1.2.2 OpenMP Language Terminology
1003   // Structured block - An executable statement with a single entry at the
1004   // top and a single exit at the bottom.
1005   // The point of exit cannot be a branch out of the structured block.
1006   // longjmp() and throw() must not violate the entry/exit criteria.
1007   CGF.EHStack.pushTerminate();
1008   if (S)
1009     CGF.incrementProfileCounter(S);
1010   CodeGen(CGF);
1011   CGF.EHStack.popTerminate();
1012 }
1013 
getThreadIDVariableLValue(CodeGenFunction & CGF)1014 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015     CodeGenFunction &CGF) {
1016   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017                             getThreadIDVariable()->getType(),
1018                             AlignmentSource::Decl);
1019 }
1020 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1021 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022                                        QualType FieldTy) {
1023   auto *Field = FieldDecl::Create(
1024       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027   Field->setAccess(AS_public);
1028   DC->addDecl(Field);
1029   return Field;
1030 }
1031 
CGOpenMPRuntime(CodeGenModule & CGM)1032 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035   llvm::OpenMPIRBuilderConfig Config(
1036       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037       CGM.getLangOpts().OpenMPOffloadMandatory,
1038       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040   OMPBuilder.initialize();
1041   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1042                                          ? CGM.getLangOpts().OMPHostIRFile
1043                                          : StringRef{});
1044   OMPBuilder.setConfig(Config);
1045 
1046   // The user forces the compiler to behave as if omp requires
1047   // unified_shared_memory was given.
1048   if (CGM.getLangOpts().OpenMPForceUSM) {
1049     HasRequiresUnifiedSharedMemory = true;
1050     OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051   }
1052 }
1053 
clear()1054 void CGOpenMPRuntime::clear() {
1055   InternalVars.clear();
1056   // Clean non-target variable declarations possibly used only in debug info.
1057   for (const auto &Data : EmittedNonTargetVariables) {
1058     if (!Data.getValue().pointsToAliveValue())
1059       continue;
1060     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1061     if (!GV)
1062       continue;
1063     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064       continue;
1065     GV->eraseFromParent();
1066   }
1067 }
1068 
getName(ArrayRef<StringRef> Parts) const1069 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070   return OMPBuilder.createPlatformSpecificName(Parts);
1071 }
1072 
1073 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1074 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075                           const Expr *CombinerInitializer, const VarDecl *In,
1076                           const VarDecl *Out, bool IsCombiner) {
1077   // void .omp_combiner.(Ty *in, Ty *out);
1078   ASTContext &C = CGM.getContext();
1079   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1080   FunctionArgList Args;
1081   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085   Args.push_back(&OmpOutParm);
1086   Args.push_back(&OmpInParm);
1087   const CGFunctionInfo &FnInfo =
1088       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1089   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1090   std::string Name = CGM.getOpenMPRuntime().getName(
1091       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1093                                     Name, &CGM.getModule());
1094   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1095   if (CGM.getLangOpts().Optimize) {
1096     Fn->removeFnAttr(llvm::Attribute::NoInline);
1097     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1098     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099   }
1100   CodeGenFunction CGF(CGM);
1101   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1104                     Out->getLocation());
1105   CodeGenFunction::OMPPrivateScope Scope(CGF);
1106   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1107   Scope.addPrivate(
1108       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1109               .getAddress());
1110   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1111   Scope.addPrivate(
1112       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1113                .getAddress());
1114   (void)Scope.Privatize();
1115   if (!IsCombiner && Out->hasInit() &&
1116       !CGF.isTrivialInitializer(Out->getInit())) {
1117     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1118                          Out->getType().getQualifiers(),
1119                          /*IsInitializer=*/true);
1120   }
1121   if (CombinerInitializer)
1122     CGF.EmitIgnoredExpr(CombinerInitializer);
1123   Scope.ForceCleanup();
1124   CGF.FinishFunction();
1125   return Fn;
1126 }
1127 
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1128 void CGOpenMPRuntime::emitUserDefinedReduction(
1129     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130   if (UDRMap.count(D) > 0)
1131     return;
1132   llvm::Function *Combiner = emitCombinerOrInitializer(
1133       CGM, D->getType(), D->getCombiner(),
1134       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1135       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1136       /*IsCombiner=*/true);
1137   llvm::Function *Initializer = nullptr;
1138   if (const Expr *Init = D->getInitializer()) {
1139     Initializer = emitCombinerOrInitializer(
1140         CGM, D->getType(),
1141         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142                                                                      : nullptr,
1143         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1144         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1145         /*IsCombiner=*/false);
1146   }
1147   UDRMap.try_emplace(D, Combiner, Initializer);
1148   if (CGF)
1149     FunctionUDRMap[CGF->CurFn].push_back(D);
1150 }
1151 
1152 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1153 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154   auto I = UDRMap.find(D);
1155   if (I != UDRMap.end())
1156     return I->second;
1157   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158   return UDRMap.lookup(D);
1159 }
1160 
1161 namespace {
1162 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163 // Builder if one is present.
1164 struct PushAndPopStackRAII {
PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1165   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166                       bool HasCancel, llvm::omp::Directive Kind)
1167       : OMPBuilder(OMPBuilder) {
1168     if (!OMPBuilder)
1169       return;
1170 
1171     // The following callback is the crucial part of clangs cleanup process.
1172     //
1173     // NOTE:
1174     // Once the OpenMPIRBuilder is used to create parallel regions (and
1175     // similar), the cancellation destination (Dest below) is determined via
1176     // IP. That means if we have variables to finalize we split the block at IP,
1177     // use the new block (=BB) as destination to build a JumpDest (via
1178     // getJumpDestInCurrentScope(BB)) which then is fed to
1179     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180     // to push & pop an FinalizationInfo object.
1181     // The FiniCB will still be needed but at the point where the
1182     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184       assert(IP.getBlock()->end() == IP.getPoint() &&
1185              "Clang CG should cause non-terminated block!");
1186       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187       CGF.Builder.restoreIP(IP);
1188       CodeGenFunction::JumpDest Dest =
1189           CGF.getOMPCancelDestination(OMPD_parallel);
1190       CGF.EmitBranchThroughCleanup(Dest);
1191       return llvm::Error::success();
1192     };
1193 
1194     // TODO: Remove this once we emit parallel regions through the
1195     //       OpenMPIRBuilder as it can do this setup internally.
1196     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1197     OMPBuilder->pushFinalizationCB(std::move(FI));
1198   }
~PushAndPopStackRAII__anon93cce0fb0211::PushAndPopStackRAII1199   ~PushAndPopStackRAII() {
1200     if (OMPBuilder)
1201       OMPBuilder->popFinalizationCB();
1202   }
1203   llvm::OpenMPIRBuilder *OMPBuilder;
1204 };
1205 } // namespace
1206 
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1207 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211   assert(ThreadIDVar->getType()->isPointerType() &&
1212          "thread id variable must be of type kmp_int32 *");
1213   CodeGenFunction CGF(CGM, true);
1214   bool HasCancel = false;
1215   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1216     HasCancel = OPD->hasCancel();
1217   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1218     HasCancel = OPD->hasCancel();
1219   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1220     HasCancel = OPSD->hasCancel();
1221   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1222     HasCancel = OPFD->hasCancel();
1223   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1224     HasCancel = OPFD->hasCancel();
1225   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1226     HasCancel = OPFD->hasCancel();
1227   else if (const auto *OPFD =
1228                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1229     HasCancel = OPFD->hasCancel();
1230   else if (const auto *OPFD =
1231                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1232     HasCancel = OPFD->hasCancel();
1233 
1234   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235   //       parallel region to make cancellation barriers work properly.
1236   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239                                     HasCancel, OutlinedHelperName);
1240   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1242 }
1243 
getOutlinedHelperName(StringRef Name) const1244 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245   std::string Suffix = getName({"omp_outlined"});
1246   return (Name + Suffix).str();
1247 }
1248 
getOutlinedHelperName(CodeGenFunction & CGF) const1249 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250   return getOutlinedHelperName(CGF.CurFn->getName());
1251 }
1252 
getReductionFuncName(StringRef Name) const1253 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1255   return (Name + Suffix).str();
1256 }
1257 
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1258 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261     const RegionCodeGenTy &CodeGen) {
1262   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263   return emitParallelOrTeamsOutlinedFunction(
1264       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1265       CodeGen);
1266 }
1267 
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1268 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271     const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275       CodeGen);
1276 }
1277 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1278 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282     bool Tied, unsigned &NumberOfParts) {
1283   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284                                               PrePostActionTy &) {
1285     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1286     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1287     llvm::Value *TaskArgs[] = {
1288         UpLoc, ThreadID,
1289         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1290                                     TaskTVar->getType()->castAs<PointerType>())
1291             .getPointer(CGF)};
1292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1293                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1294                         TaskArgs);
1295   };
1296   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297                                                             UntiedCodeGen);
1298   CodeGen.setAction(Action);
1299   assert(!ThreadIDVar->getType()->isPointerType() &&
1300          "thread id variable must be of type kmp_int32 for tasks");
1301   const OpenMPDirectiveKind Region =
1302       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1303                                                       : OMPD_task;
1304   const CapturedStmt *CS = D.getCapturedStmt(Region);
1305   bool HasCancel = false;
1306   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1307     HasCancel = TD->hasCancel();
1308   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1309     HasCancel = TD->hasCancel();
1310   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1311     HasCancel = TD->hasCancel();
1312   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1313     HasCancel = TD->hasCancel();
1314 
1315   CodeGenFunction CGF(CGM, true);
1316   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317                                         InnermostKind, HasCancel, Action);
1318   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1320   if (!Tied)
1321     NumberOfParts = Action.getNumberOfParts();
1322   return Res;
1323 }
1324 
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1325 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326                                              bool AtCurrentPoint) {
1327   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328   assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329 
1330   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1331   if (AtCurrentPoint) {
1332     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333                                                  CGF.Builder.GetInsertBlock());
1334   } else {
1335     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336     Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1337   }
1338 }
1339 
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1340 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342   if (Elem.ServiceInsertPt) {
1343     llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344     Elem.ServiceInsertPt = nullptr;
1345     Ptr->eraseFromParent();
1346   }
1347 }
1348 
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1349 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350                                                   SourceLocation Loc,
1351                                                   SmallString<128> &Buffer) {
1352   llvm::raw_svector_ostream OS(Buffer);
1353   // Build debug location
1354   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355   OS << ";";
1356   if (auto *DbgInfo = CGF.getDebugInfo())
1357     OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358   else
1359     OS << PLoc.getFilename();
1360   OS << ";";
1361   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1362     OS << FD->getQualifiedNameAsString();
1363   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364   return OS.str();
1365 }
1366 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags,bool EmitLoc)1367 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368                                                  SourceLocation Loc,
1369                                                  unsigned Flags, bool EmitLoc) {
1370   uint32_t SrcLocStrSize;
1371   llvm::Constant *SrcLocStr;
1372   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373                        llvm::codegenoptions::NoDebugInfo) ||
1374       Loc.isInvalid()) {
1375     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376   } else {
1377     std::string FunctionName;
1378     std::string FileName;
1379     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380       FunctionName = FD->getQualifiedNameAsString();
1381     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382     if (auto *DbgInfo = CGF.getDebugInfo())
1383       FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384     else
1385       FileName = PLoc.getFilename();
1386     unsigned Line = PLoc.getLine();
1387     unsigned Column = PLoc.getColumn();
1388     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389                                                 Column, SrcLocStrSize);
1390   }
1391   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392   return OMPBuilder.getOrCreateIdent(
1393       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1394 }
1395 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1396 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397                                           SourceLocation Loc) {
1398   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400   // the clang invariants used below might be broken.
1401   if (CGM.getLangOpts().OpenMPIRBuilder) {
1402     SmallString<128> Buffer;
1403     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1404     uint32_t SrcLocStrSize;
1405     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407     return OMPBuilder.getOrCreateThreadID(
1408         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409   }
1410 
1411   llvm::Value *ThreadID = nullptr;
1412   // Check whether we've already cached a load of the thread id in this
1413   // function.
1414   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1415   if (I != OpenMPLocThreadIDMap.end()) {
1416     ThreadID = I->second.ThreadID;
1417     if (ThreadID != nullptr)
1418       return ThreadID;
1419   }
1420   // If exceptions are enabled, do not use parameter to avoid possible crash.
1421   if (auto *OMPRegionInfo =
1422           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1423     if (OMPRegionInfo->getThreadIDVariable()) {
1424       // Check if this an outlined function with thread id passed as argument.
1425       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428           !CGF.getLangOpts().CXXExceptions ||
1429           CGF.Builder.GetInsertBlock() == TopBlock ||
1430           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               TopBlock ||
1433           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1434               CGF.Builder.GetInsertBlock()) {
1435         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1436         // If value loaded in entry block, cache it and use it everywhere in
1437         // function.
1438         if (CGF.Builder.GetInsertBlock() == TopBlock)
1439           OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450   if (!Elem.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1455   llvm::CallInst *Call = CGF.Builder.CreateCall(
1456       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1457                                             OMPRTL___kmpc_global_thread_num),
1458       emitUpdateLocation(CGF, Loc));
1459   Call->setCallingConv(CGF.getRuntimeCC());
1460   Elem.ThreadID = Call;
1461   return Call;
1462 }
1463 
functionFinished(CodeGenFunction & CGF)1464 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467     clearLocThreadIdInsertPt(CGF);
1468     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1469   }
1470   if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1471     for (const auto *D : I->second)
1472       UDRMap.erase(D);
1473     FunctionUDRMap.erase(I);
1474   }
1475   if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1476     for (const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
getIdentTyPointerTy()1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
convertDeviceClause(const VarDecl * VD)1489 convertDeviceClause(const VarDecl *VD) {
1490   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492   if (!DevTy)
1493     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494 
1495   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496   case OMPDeclareTargetDeclAttr::DT_Host:
1497     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498     break;
1499   case OMPDeclareTargetDeclAttr::DT_NoHost:
1500     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501     break;
1502   case OMPDeclareTargetDeclAttr::DT_Any:
1503     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504     break;
1505   default:
1506     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507     break;
1508   }
1509 }
1510 
1511 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
convertCaptureClause(const VarDecl * VD)1512 convertCaptureClause(const VarDecl *VD) {
1513   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515   if (!MapType)
1516     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520     break;
1521   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523     break;
1524   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526     break;
1527   default:
1528     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529     break;
1530   }
1531 }
1532 
getEntryInfoFromPresumedLoc(CodeGenModule & CGM,llvm::OpenMPIRBuilder & OMPBuilder,SourceLocation BeginLoc,llvm::StringRef ParentName="")1533 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536 
1537   auto FileInfoCallBack = [&]() {
1538     SourceManager &SM = CGM.getContext().getSourceManager();
1539     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1540 
1541     llvm::sys::fs::UniqueID ID;
1542     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1543       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1544     }
1545 
1546     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547   };
1548 
1549   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1550 }
1551 
getAddrOfDeclareTargetVar(const VarDecl * VD)1552 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1554 
1555   auto LinkageForVariable = [&VD, this]() {
1556     return CGM.getLLVMLinkageVarDefinition(VD);
1557   };
1558 
1559   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560 
1561   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562       CGM.getContext().getPointerType(VD->getType()));
1563   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564       convertCaptureClause(VD), convertDeviceClause(VD),
1565       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566       VD->isExternallyVisible(),
1567       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568                                   VD->getCanonicalDecl()->getBeginLoc()),
1569       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1570       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1571       LinkageForVariable);
1572 
1573   if (!addr)
1574     return ConstantAddress::invalid();
1575   return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1576 }
1577 
1578 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1579 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581          !CGM.getContext().getTargetInfo().isTLSSupported());
1582   // Lookup the entry, lazily creating it if necessary.
1583   std::string Suffix = getName({"cache", ""});
1584   return OMPBuilder.getOrCreateInternalVariable(
1585       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1586 }
1587 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1588 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589                                                 const VarDecl *VD,
1590                                                 Address VDAddr,
1591                                                 SourceLocation Loc) {
1592   if (CGM.getLangOpts().OpenMPUseTLS &&
1593       CGM.getContext().getTargetInfo().isTLSSupported())
1594     return VDAddr;
1595 
1596   llvm::Type *VarTy = VDAddr.getElementType();
1597   llvm::Value *Args[] = {
1598       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1600       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1601       getOrCreateThreadPrivateCache(VD)};
1602   return Address(
1603       CGF.EmitRuntimeCall(
1604           OMPBuilder.getOrCreateRuntimeFunction(
1605               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1606           Args),
1607       CGF.Int8Ty, VDAddr.getAlignment());
1608 }
1609 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1610 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614   // library.
1615   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1617                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1618                       OMPLoc);
1619   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620   // to register constructor/destructor for variable.
1621   llvm::Value *Args[] = {
1622       OMPLoc,
1623       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1624       Ctor, CopyCtor, Dtor};
1625   CGF.EmitRuntimeCall(
1626       OMPBuilder.getOrCreateRuntimeFunction(
1627           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628       Args);
1629 }
1630 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1631 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633     bool PerformInit, CodeGenFunction *CGF) {
1634   if (CGM.getLangOpts().OpenMPUseTLS &&
1635       CGM.getContext().getTargetInfo().isTLSSupported())
1636     return nullptr;
1637 
1638   VD = VD->getDefinition(CGM.getContext());
1639   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640     QualType ASTTy = VD->getType();
1641 
1642     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643     const Expr *Init = VD->getAnyInitializer();
1644     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645       // Generate function that re-emits the declaration's initializer into the
1646       // threadprivate copy of the variable VD
1647       CodeGenFunction CtorCGF(CGM);
1648       FunctionArgList Args;
1649       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651                             ImplicitParamKind::Other);
1652       Args.push_back(&Dst);
1653 
1654       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655           CGM.getContext().VoidPtrTy, Args);
1656       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657       std::string Name = getName({"__kmpc_global_ctor_", ""});
1658       llvm::Function *Fn =
1659           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1660       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661                             Args, Loc, Loc);
1662       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664           CGM.getContext().VoidPtrTy, Dst.getLocation());
1665       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666                   VDAddr.getAlignment());
1667       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668                                /*IsInitializer=*/true);
1669       ArgVal = CtorCGF.EmitLoadOfScalar(
1670           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671           CGM.getContext().VoidPtrTy, Dst.getLocation());
1672       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673       CtorCGF.FinishFunction();
1674       Ctor = Fn;
1675     }
1676     if (VD->getType().isDestructedType() != QualType::DK_none) {
1677       // Generate function that emits destructor call for the threadprivate copy
1678       // of the variable VD
1679       CodeGenFunction DtorCGF(CGM);
1680       FunctionArgList Args;
1681       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683                             ImplicitParamKind::Other);
1684       Args.push_back(&Dst);
1685 
1686       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687           CGM.getContext().VoidTy, Args);
1688       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689       std::string Name = getName({"__kmpc_global_dtor_", ""});
1690       llvm::Function *Fn =
1691           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1692       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694                             Loc, Loc);
1695       // Create a scope with an artificial location for the body of this function.
1696       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698           DtorCGF.GetAddrOfLocalVar(&Dst),
1699           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700       DtorCGF.emitDestroy(
1701           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704       DtorCGF.FinishFunction();
1705       Dtor = Fn;
1706     }
1707     // Do not emit init function if it is not required.
1708     if (!Ctor && !Dtor)
1709       return nullptr;
1710 
1711     // Copying constructor for the threadprivate variable.
1712     // Must be NULL - reserved by runtime, but currently it requires that this
1713     // parameter is always NULL. Otherwise it fires assertion.
1714     CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1715     if (Ctor == nullptr) {
1716       Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1717     }
1718     if (Dtor == nullptr) {
1719       Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1720     }
1721     if (!CGF) {
1722       auto *InitFunctionTy =
1723           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1724       std::string Name = getName({"__omp_threadprivate_init_", ""});
1725       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1727       CodeGenFunction InitCGF(CGM);
1728       FunctionArgList ArgList;
1729       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1730                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1731                             Loc, Loc);
1732       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733       InitCGF.FinishFunction();
1734       return InitFunction;
1735     }
1736     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737   }
1738   return nullptr;
1739 }
1740 
emitDeclareTargetFunction(const FunctionDecl * FD,llvm::GlobalValue * GV)1741 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742                                                 llvm::GlobalValue *GV) {
1743   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1745 
1746   // We only need to handle active 'indirect' declare target functions.
1747   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748     return;
1749 
1750   // Get a mangled name to store the new device global in.
1751   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1753   SmallString<128> Name;
1754   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755 
1756   // We need to generate a new global to hold the address of the indirectly
1757   // called device function. Doing this allows us to keep the visibility and
1758   // linkage of the associated function unchanged while allowing the runtime to
1759   // access its value.
1760   llvm::GlobalValue *Addr = GV;
1761   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762     Addr = new llvm::GlobalVariable(
1763         CGM.getModule(), CGM.VoidPtrTy,
1764         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765         nullptr, llvm::GlobalValue::NotThreadLocal,
1766         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768   }
1769 
1770   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1772       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773       llvm::GlobalValue::WeakODRLinkage);
1774 }
1775 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1776 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777                                                           QualType VarType,
1778                                                           StringRef Name) {
1779   std::string Suffix = getName({"artificial", ""});
1780   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1781   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782       VarLVType, Twine(Name).concat(Suffix).str());
1783   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784       CGM.getTarget().isTLSSupported()) {
1785     GAddr->setThreadLocal(/*Val=*/true);
1786     return Address(GAddr, GAddr->getValueType(),
1787                    CGM.getContext().getTypeAlignInChars(VarType));
1788   }
1789   std::string CacheSuffix = getName({"cache", ""});
1790   llvm::Value *Args[] = {
1791       emitUpdateLocation(CGF, SourceLocation()),
1792       getThreadID(CGF, SourceLocation()),
1793       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1794       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1795                                 /*isSigned=*/false),
1796       OMPBuilder.getOrCreateInternalVariable(
1797           CGM.VoidPtrPtrTy,
1798           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1799   return Address(
1800       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801           CGF.EmitRuntimeCall(
1802               OMPBuilder.getOrCreateRuntimeFunction(
1803                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1804               Args),
1805           CGF.Builder.getPtrTy(0)),
1806       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1807 }
1808 
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1809 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810                                    const RegionCodeGenTy &ThenGen,
1811                                    const RegionCodeGenTy &ElseGen) {
1812   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813 
1814   // If the condition constant folds and can be elided, try to avoid emitting
1815   // the condition and the dead arm of the if/else.
1816   bool CondConstant;
1817   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1818     if (CondConstant)
1819       ThenGen(CGF);
1820     else
1821       ElseGen(CGF);
1822     return;
1823   }
1824 
1825   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1826   // emit the conditional branch.
1827   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1828   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1829   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1830   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1831 
1832   // Emit the 'then' code.
1833   CGF.EmitBlock(ThenBlock);
1834   ThenGen(CGF);
1835   CGF.EmitBranch(ContBlock);
1836   // Emit the 'else' code if present.
1837   // There is no need to emit line number for unconditional branch.
1838   (void)ApplyDebugLocation::CreateEmpty(CGF);
1839   CGF.EmitBlock(ElseBlock);
1840   ElseGen(CGF);
1841   // There is no need to emit line number for unconditional branch.
1842   (void)ApplyDebugLocation::CreateEmpty(CGF);
1843   CGF.EmitBranch(ContBlock);
1844   // Emit the continuation block for code after the if.
1845   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1846 }
1847 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)1848 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849                                        llvm::Function *OutlinedFn,
1850                                        ArrayRef<llvm::Value *> CapturedVars,
1851                                        const Expr *IfCond,
1852                                        llvm::Value *NumThreads) {
1853   if (!CGF.HaveInsertPoint())
1854     return;
1855   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856   auto &M = CGM.getModule();
1857   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1859     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860     llvm::Value *Args[] = {
1861         RTLoc,
1862         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1863         OutlinedFn};
1864     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865     RealArgs.append(std::begin(Args), std::end(Args));
1866     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1867 
1868     llvm::FunctionCallee RTLFn =
1869         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1870     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1871   };
1872   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1874     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876     // Build calls:
1877     // __kmpc_serialized_parallel(&Loc, GTid);
1878     llvm::Value *Args[] = {RTLoc, ThreadID};
1879     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1880                             M, OMPRTL___kmpc_serialized_parallel),
1881                         Args);
1882 
1883     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885     RawAddress ZeroAddrBound =
1886         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1887                                          /*Name=*/".bound.zero.addr");
1888     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1889     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890     // ThreadId for serialized parallels is 0.
1891     OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1892     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1893     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1894 
1895     // Ensure we do not inline the function. This is trivially true for the ones
1896     // passed to __kmpc_fork_call but the ones called in serialized regions
1897     // could be inlined. This is not a perfect but it is closer to the invariant
1898     // we want, namely, every data environment starts with a new function.
1899     // TODO: We should pass the if condition to the runtime function and do the
1900     //       handling there. Much cleaner code.
1901     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1902     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1903     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1904 
1905     // __kmpc_end_serialized_parallel(&Loc, GTid);
1906     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1908                             M, OMPRTL___kmpc_end_serialized_parallel),
1909                         EndArgs);
1910   };
1911   if (IfCond) {
1912     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1913   } else {
1914     RegionCodeGenTy ThenRCG(ThenGen);
1915     ThenRCG(CGF);
1916   }
1917 }
1918 
1919 // If we're inside an (outlined) parallel region, use the region info's
1920 // thread-ID variable (it is passed in a first argument of the outlined function
1921 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922 // regular serial code region, get thread ID by calling kmp_int32
1923 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)1925 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926                                              SourceLocation Loc) {
1927   if (auto *OMPRegionInfo =
1928           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1929     if (OMPRegionInfo->getThreadIDVariable())
1930       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931 
1932   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933   QualType Int32Ty =
1934       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1936   CGF.EmitStoreOfScalar(ThreadID,
1937                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1938 
1939   return ThreadIDTemp;
1940 }
1941 
getCriticalRegionLock(StringRef CriticalName)1942 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944   std::string Name = getName({Prefix, "var"});
1945   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1946 }
1947 
1948 namespace {
1949 /// Common pre(post)-action for different OpenMP constructs.
1950 class CommonActionTy final : public PrePostActionTy {
1951   llvm::FunctionCallee EnterCallee;
1952   ArrayRef<llvm::Value *> EnterArgs;
1953   llvm::FunctionCallee ExitCallee;
1954   ArrayRef<llvm::Value *> ExitArgs;
1955   bool Conditional;
1956   llvm::BasicBlock *ContBlock = nullptr;
1957 
1958 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)1959   CommonActionTy(llvm::FunctionCallee EnterCallee,
1960                  ArrayRef<llvm::Value *> EnterArgs,
1961                  llvm::FunctionCallee ExitCallee,
1962                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964         ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)1965   void Enter(CodeGenFunction &CGF) override {
1966     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1967     if (Conditional) {
1968       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1969       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1970       ContBlock = CGF.createBasicBlock("omp_if.end");
1971       // Generate the branch (If-stmt)
1972       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1973       CGF.EmitBlock(ThenBlock);
1974     }
1975   }
Done(CodeGenFunction & CGF)1976   void Done(CodeGenFunction &CGF) {
1977     // Emit the rest of blocks/branches
1978     CGF.EmitBranch(ContBlock);
1979     CGF.EmitBlock(ContBlock, true);
1980   }
Exit(CodeGenFunction & CGF)1981   void Exit(CodeGenFunction &CGF) override {
1982     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1983   }
1984 };
1985 } // anonymous namespace
1986 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)1987 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988                                          StringRef CriticalName,
1989                                          const RegionCodeGenTy &CriticalOpGen,
1990                                          SourceLocation Loc, const Expr *Hint) {
1991   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992   // CriticalOpGen();
1993   // __kmpc_end_critical(ident_t *, gtid, Lock);
1994   // Prepare arguments and build a call to __kmpc_critical
1995   if (!CGF.HaveInsertPoint())
1996     return;
1997   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998                          getCriticalRegionLock(CriticalName)};
1999   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2000                                                 std::end(Args));
2001   if (Hint) {
2002     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2003         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2004   }
2005   CommonActionTy Action(
2006       OMPBuilder.getOrCreateRuntimeFunction(
2007           CGM.getModule(),
2008           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009       EnterArgs,
2010       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2011                                             OMPRTL___kmpc_end_critical),
2012       Args);
2013   CriticalOpGen.setAction(Action);
2014   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2015 }
2016 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2017 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018                                        const RegionCodeGenTy &MasterOpGen,
2019                                        SourceLocation Loc) {
2020   if (!CGF.HaveInsertPoint())
2021     return;
2022   // if(__kmpc_master(ident_t *, gtid)) {
2023   //   MasterOpGen();
2024   //   __kmpc_end_master(ident_t *, gtid);
2025   // }
2026   // Prepare arguments and build a call to __kmpc_master
2027   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029                             CGM.getModule(), OMPRTL___kmpc_master),
2030                         Args,
2031                         OMPBuilder.getOrCreateRuntimeFunction(
2032                             CGM.getModule(), OMPRTL___kmpc_end_master),
2033                         Args,
2034                         /*Conditional=*/true);
2035   MasterOpGen.setAction(Action);
2036   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2037   Action.Done(CGF);
2038 }
2039 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2040 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041                                        const RegionCodeGenTy &MaskedOpGen,
2042                                        SourceLocation Loc, const Expr *Filter) {
2043   if (!CGF.HaveInsertPoint())
2044     return;
2045   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046   //   MaskedOpGen();
2047   //   __kmpc_end_masked(iden_t *, gtid);
2048   // }
2049   // Prepare arguments and build a call to __kmpc_masked
2050   llvm::Value *FilterVal = Filter
2051                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2052                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2053   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054                          FilterVal};
2055   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056                             getThreadID(CGF, Loc)};
2057   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058                             CGM.getModule(), OMPRTL___kmpc_masked),
2059                         Args,
2060                         OMPBuilder.getOrCreateRuntimeFunction(
2061                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2062                         ArgsEnd,
2063                         /*Conditional=*/true);
2064   MaskedOpGen.setAction(Action);
2065   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2066   Action.Done(CGF);
2067 }
2068 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2069 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070                                         SourceLocation Loc) {
2071   if (!CGF.HaveInsertPoint())
2072     return;
2073   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074     OMPBuilder.createTaskyield(CGF.Builder);
2075   } else {
2076     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077     llvm::Value *Args[] = {
2078         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2080     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2081                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2082                         Args);
2083   }
2084 
2085   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2086     Region->emitUntiedSwitch(CGF);
2087 }
2088 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2089 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090                                           const RegionCodeGenTy &TaskgroupOpGen,
2091                                           SourceLocation Loc) {
2092   if (!CGF.HaveInsertPoint())
2093     return;
2094   // __kmpc_taskgroup(ident_t *, gtid);
2095   // TaskgroupOpGen();
2096   // __kmpc_end_taskgroup(ident_t *, gtid);
2097   // Prepare arguments and build a call to __kmpc_taskgroup
2098   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2101                         Args,
2102                         OMPBuilder.getOrCreateRuntimeFunction(
2103                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2104                         Args);
2105   TaskgroupOpGen.setAction(Action);
2106   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2107 }
2108 
2109 /// Given an array of pointers to variables, project the address of a
2110 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2111 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112                                       unsigned Index, const VarDecl *Var) {
2113   // Pull out the pointer to the variable.
2114   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2115   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2116 
2117   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2118   return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2119 }
2120 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsElemType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2121 static llvm::Value *emitCopyprivateCopyFunction(
2122     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125     SourceLocation Loc) {
2126   ASTContext &C = CGM.getContext();
2127   // void copy_func(void *LHSArg, void *RHSArg);
2128   FunctionArgList Args;
2129   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130                            ImplicitParamKind::Other);
2131   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132                            ImplicitParamKind::Other);
2133   Args.push_back(&LHSArg);
2134   Args.push_back(&RHSArg);
2135   const auto &CGFI =
2136       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2137   std::string Name =
2138       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2139   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2140                                     llvm::GlobalValue::InternalLinkage, Name,
2141                                     &CGM.getModule());
2142   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2143   Fn->setDoesNotRecurse();
2144   CodeGenFunction CGF(CGM);
2145   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2146   // Dest = (void*[n])(LHSArg);
2147   // Src = (void*[n])(RHSArg);
2148   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2150                   CGF.Builder.getPtrTy(0)),
2151               ArgsElemType, CGF.getPointerAlign());
2152   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2154                   CGF.Builder.getPtrTy(0)),
2155               ArgsElemType, CGF.getPointerAlign());
2156   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158   // ...
2159   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161     const auto *DestVar =
2162         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2163     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2164 
2165     const auto *SrcVar =
2166         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2167     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2168 
2169     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2170     QualType Type = VD->getType();
2171     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2172   }
2173   CGF.FinishFunction();
2174   return Fn;
2175 }
2176 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2177 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178                                        const RegionCodeGenTy &SingleOpGen,
2179                                        SourceLocation Loc,
2180                                        ArrayRef<const Expr *> CopyprivateVars,
2181                                        ArrayRef<const Expr *> SrcExprs,
2182                                        ArrayRef<const Expr *> DstExprs,
2183                                        ArrayRef<const Expr *> AssignmentOps) {
2184   if (!CGF.HaveInsertPoint())
2185     return;
2186   assert(CopyprivateVars.size() == SrcExprs.size() &&
2187          CopyprivateVars.size() == DstExprs.size() &&
2188          CopyprivateVars.size() == AssignmentOps.size());
2189   ASTContext &C = CGM.getContext();
2190   // int32 did_it = 0;
2191   // if(__kmpc_single(ident_t *, gtid)) {
2192   //   SingleOpGen();
2193   //   __kmpc_end_single(ident_t *, gtid);
2194   //   did_it = 1;
2195   // }
2196   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197   // <copy_func>, did_it);
2198 
2199   Address DidIt = Address::invalid();
2200   if (!CopyprivateVars.empty()) {
2201     // int32 did_it = 0;
2202     QualType KmpInt32Ty =
2203         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2205     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2206   }
2207   // Prepare arguments and build a call to __kmpc_single
2208   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210                             CGM.getModule(), OMPRTL___kmpc_single),
2211                         Args,
2212                         OMPBuilder.getOrCreateRuntimeFunction(
2213                             CGM.getModule(), OMPRTL___kmpc_end_single),
2214                         Args,
2215                         /*Conditional=*/true);
2216   SingleOpGen.setAction(Action);
2217   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2218   if (DidIt.isValid()) {
2219     // did_it = 1;
2220     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2221   }
2222   Action.Done(CGF);
2223   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224   // <copy_func>, did_it);
2225   if (DidIt.isValid()) {
2226     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227     QualType CopyprivateArrayTy = C.getConstantArrayType(
2228         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2229         /*IndexTypeQuals=*/0);
2230     // Create a list of all private variables for copyprivate.
2231     Address CopyprivateList =
2232         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2233     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2235       CGF.Builder.CreateStore(
2236           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2238               CGF.VoidPtrTy),
2239           Elem);
2240     }
2241     // Build function that copies private values from single region to all other
2242     // threads in the corresponding parallel region.
2243     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2245         SrcExprs, DstExprs, AssignmentOps, Loc);
2246     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2247     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2249     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2250     llvm::Value *Args[] = {
2251         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252         getThreadID(CGF, Loc),        // i32 <gtid>
2253         BufSize,                      // size_t <buf_size>
2254         CL.emitRawPointer(CGF),       // void *<copyprivate list>
2255         CpyFn,                        // void (*) (void *, void *) <copy_func>
2256         DidItVal                      // i32 did_it
2257     };
2258     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2259                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2260                         Args);
2261   }
2262 }
2263 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2264 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265                                         const RegionCodeGenTy &OrderedOpGen,
2266                                         SourceLocation Loc, bool IsThreads) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // __kmpc_ordered(ident_t *, gtid);
2270   // OrderedOpGen();
2271   // __kmpc_end_ordered(ident_t *, gtid);
2272   // Prepare arguments and build a call to __kmpc_ordered
2273   if (IsThreads) {
2274     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                               CGM.getModule(), OMPRTL___kmpc_ordered),
2277                           Args,
2278                           OMPBuilder.getOrCreateRuntimeFunction(
2279                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2280                           Args);
2281     OrderedOpGen.setAction(Action);
2282     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2283     return;
2284   }
2285   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286 }
2287 
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2288 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289   unsigned Flags;
2290   if (Kind == OMPD_for)
2291     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292   else if (Kind == OMPD_sections)
2293     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294   else if (Kind == OMPD_single)
2295     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296   else if (Kind == OMPD_barrier)
2297     Flags = OMP_IDENT_BARRIER_EXPL;
2298   else
2299     Flags = OMP_IDENT_BARRIER_IMPL;
2300   return Flags;
2301 }
2302 
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2303 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304     CodeGenFunction &CGF, const OMPLoopDirective &S,
2305     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306   // Check if the loop directive is actually a doacross loop directive. In this
2307   // case choose static, 1 schedule.
2308   if (llvm::any_of(
2309           S.getClausesOfKind<OMPOrderedClause>(),
2310           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311     ScheduleKind = OMPC_SCHEDULE_static;
2312     // Chunk size is 1 in this case.
2313     llvm::APInt ChunkSize(32, 1);
2314     ChunkExpr = IntegerLiteral::Create(
2315         CGF.getContext(), ChunkSize,
2316         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2317         SourceLocation());
2318   }
2319 }
2320 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2321 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2323                                       bool ForceSimpleCall) {
2324   // Check if we should use the OMPBuilder
2325   auto *OMPRegionInfo =
2326       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2327   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329         cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2330                                           EmitChecks));
2331     CGF.Builder.restoreIP(AfterIP);
2332     return;
2333   }
2334 
2335   if (!CGF.HaveInsertPoint())
2336     return;
2337   // Build call __kmpc_cancel_barrier(loc, thread_id);
2338   // Build call __kmpc_barrier(loc, thread_id);
2339   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341   // thread_id);
2342   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343                          getThreadID(CGF, Loc)};
2344   if (OMPRegionInfo) {
2345     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346       llvm::Value *Result = CGF.EmitRuntimeCall(
2347           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2348                                                 OMPRTL___kmpc_cancel_barrier),
2349           Args);
2350       if (EmitChecks) {
2351         // if (__kmpc_cancel_barrier()) {
2352         //   exit from construct;
2353         // }
2354         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2355         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2356         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2357         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2358         CGF.EmitBlock(ExitBB);
2359         //   exit from construct;
2360         CodeGenFunction::JumpDest CancelDestination =
2361             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2362         CGF.EmitBranchThroughCleanup(CancelDestination);
2363         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2364       }
2365       return;
2366     }
2367   }
2368   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2369                           CGM.getModule(), OMPRTL___kmpc_barrier),
2370                       Args);
2371 }
2372 
emitErrorCall(CodeGenFunction & CGF,SourceLocation Loc,Expr * ME,bool IsFatal)2373 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374                                     Expr *ME, bool IsFatal) {
2375   llvm::Value *MVL =
2376       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2377          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2378   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379   // *message)
2380   llvm::Value *Args[] = {
2381       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2382       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2383       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2384   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2385                           CGM.getModule(), OMPRTL___kmpc_error),
2386                       Args);
2387 }
2388 
2389 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2390 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391                                           bool Chunked, bool Ordered) {
2392   switch (ScheduleKind) {
2393   case OMPC_SCHEDULE_static:
2394     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2396   case OMPC_SCHEDULE_dynamic:
2397     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398   case OMPC_SCHEDULE_guided:
2399     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400   case OMPC_SCHEDULE_runtime:
2401     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402   case OMPC_SCHEDULE_auto:
2403     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404   case OMPC_SCHEDULE_unknown:
2405     assert(!Chunked && "chunk was specified but schedule kind not known");
2406     return Ordered ? OMP_ord_static : OMP_sch_static;
2407   }
2408   llvm_unreachable("Unexpected runtime schedule");
2409 }
2410 
2411 /// Map the OpenMP distribute schedule to the runtime enumeration.
2412 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2413 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414   // only static is allowed for dist_schedule
2415   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416 }
2417 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2418 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419                                          bool Chunked) const {
2420   OpenMPSchedType Schedule =
2421       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422   return Schedule == OMP_sch_static;
2423 }
2424 
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2425 bool CGOpenMPRuntime::isStaticNonchunked(
2426     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428   return Schedule == OMP_dist_sch_static;
2429 }
2430 
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2431 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432                                       bool Chunked) const {
2433   OpenMPSchedType Schedule =
2434       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435   return Schedule == OMP_sch_static_chunked;
2436 }
2437 
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2438 bool CGOpenMPRuntime::isStaticChunked(
2439     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441   return Schedule == OMP_dist_sch_static_chunked;
2442 }
2443 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2444 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445   OpenMPSchedType Schedule =
2446       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448   return Schedule != OMP_sch_static;
2449 }
2450 
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2451 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452                                   OpenMPScheduleClauseModifier M1,
2453                                   OpenMPScheduleClauseModifier M2) {
2454   int Modifier = 0;
2455   switch (M1) {
2456   case OMPC_SCHEDULE_MODIFIER_monotonic:
2457     Modifier = OMP_sch_modifier_monotonic;
2458     break;
2459   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460     Modifier = OMP_sch_modifier_nonmonotonic;
2461     break;
2462   case OMPC_SCHEDULE_MODIFIER_simd:
2463     if (Schedule == OMP_sch_static_chunked)
2464       Schedule = OMP_sch_static_balanced_chunked;
2465     break;
2466   case OMPC_SCHEDULE_MODIFIER_last:
2467   case OMPC_SCHEDULE_MODIFIER_unknown:
2468     break;
2469   }
2470   switch (M2) {
2471   case OMPC_SCHEDULE_MODIFIER_monotonic:
2472     Modifier = OMP_sch_modifier_monotonic;
2473     break;
2474   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475     Modifier = OMP_sch_modifier_nonmonotonic;
2476     break;
2477   case OMPC_SCHEDULE_MODIFIER_simd:
2478     if (Schedule == OMP_sch_static_chunked)
2479       Schedule = OMP_sch_static_balanced_chunked;
2480     break;
2481   case OMPC_SCHEDULE_MODIFIER_last:
2482   case OMPC_SCHEDULE_MODIFIER_unknown:
2483     break;
2484   }
2485   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486   // If the static schedule kind is specified or if the ordered clause is
2487   // specified, and if the nonmonotonic modifier is not specified, the effect is
2488   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489   // modifier is specified, the effect is as if the nonmonotonic modifier is
2490   // specified.
2491   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493           Schedule == OMP_sch_static_balanced_chunked ||
2494           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495           Schedule == OMP_dist_sch_static_chunked ||
2496           Schedule == OMP_dist_sch_static))
2497       Modifier = OMP_sch_modifier_nonmonotonic;
2498   }
2499   return Schedule | Modifier;
2500 }
2501 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2502 void CGOpenMPRuntime::emitForDispatchInit(
2503     CodeGenFunction &CGF, SourceLocation Loc,
2504     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505     bool Ordered, const DispatchRTInput &DispatchValues) {
2506   if (!CGF.HaveInsertPoint())
2507     return;
2508   OpenMPSchedType Schedule = getRuntimeSchedule(
2509       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2510   assert(Ordered ||
2511          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513           Schedule != OMP_sch_static_balanced_chunked));
2514   // Call __kmpc_dispatch_init(
2515   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2517   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518 
2519   // If the Chunk was not specified in the clause - use default value 1.
2520   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521                                             : CGF.Builder.getIntN(IVSize, 1);
2522   llvm::Value *Args[] = {
2523       emitUpdateLocation(CGF, Loc),
2524       getThreadID(CGF, Loc),
2525       CGF.Builder.getInt32(addMonoNonMonoModifier(
2526           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2527       DispatchValues.LB,                                     // Lower
2528       DispatchValues.UB,                                     // Upper
2529       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2530       Chunk                                                  // Chunk
2531   };
2532   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533                       Args);
2534 }
2535 
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)2536 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537                                             SourceLocation Loc) {
2538   if (!CGF.HaveInsertPoint())
2539     return;
2540   // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542   CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2543 }
2544 
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2545 static void emitForStaticInitCall(
2546     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549     const CGOpenMPRuntime::StaticRTInput &Values) {
2550   if (!CGF.HaveInsertPoint())
2551     return;
2552 
2553   assert(!Values.Ordered);
2554   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555          Schedule == OMP_sch_static_balanced_chunked ||
2556          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557          Schedule == OMP_dist_sch_static ||
2558          Schedule == OMP_dist_sch_static_chunked);
2559 
2560   // Call __kmpc_for_static_init(
2561   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565   llvm::Value *Chunk = Values.Chunk;
2566   if (Chunk == nullptr) {
2567     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568             Schedule == OMP_dist_sch_static) &&
2569            "expected static non-chunked schedule");
2570     // If the Chunk was not specified in the clause - use default value 1.
2571     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2572   } else {
2573     assert((Schedule == OMP_sch_static_chunked ||
2574             Schedule == OMP_sch_static_balanced_chunked ||
2575             Schedule == OMP_ord_static_chunked ||
2576             Schedule == OMP_dist_sch_static_chunked) &&
2577            "expected static chunked schedule");
2578   }
2579   llvm::Value *Args[] = {
2580       UpdateLocation,
2581       ThreadId,
2582       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2583                                                   M2)), // Schedule type
2584       Values.IL.emitRawPointer(CGF),                    // &isLastIter
2585       Values.LB.emitRawPointer(CGF),                    // &LB
2586       Values.UB.emitRawPointer(CGF),                    // &UB
2587       Values.ST.emitRawPointer(CGF),                    // &Stride
2588       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2589       Chunk                                             // Chunk
2590   };
2591   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2592 }
2593 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2594 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595                                         SourceLocation Loc,
2596                                         OpenMPDirectiveKind DKind,
2597                                         const OpenMPScheduleTy &ScheduleKind,
2598                                         const StaticRTInput &Values) {
2599   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2601   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602          "Expected loop-based or sections-based directive.");
2603   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604                                              isOpenMPLoopDirective(DKind)
2605                                                  ? OMP_IDENT_WORK_LOOP
2606                                                  : OMP_IDENT_WORK_SECTIONS);
2607   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608   llvm::FunctionCallee StaticInitFunction =
2609       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2610                                              false);
2611   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2612   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2613                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2614 }
2615 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2616 void CGOpenMPRuntime::emitDistributeStaticInit(
2617     CodeGenFunction &CGF, SourceLocation Loc,
2618     OpenMPDistScheduleClauseKind SchedKind,
2619     const CGOpenMPRuntime::StaticRTInput &Values) {
2620   OpenMPSchedType ScheduleNum =
2621       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2622   llvm::Value *UpdatedLocation =
2623       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2624   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625   llvm::FunctionCallee StaticInitFunction;
2626   bool isGPUDistribute =
2627       CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629       Values.IVSize, Values.IVSigned, isGPUDistribute);
2630 
2631   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2632                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2633                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634 }
2635 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2636 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637                                           SourceLocation Loc,
2638                                           OpenMPDirectiveKind DKind) {
2639   assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640           DKind == OMPD_sections) &&
2641          "Expected distribute, for, or sections directive kind");
2642   if (!CGF.HaveInsertPoint())
2643     return;
2644   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645   llvm::Value *Args[] = {
2646       emitUpdateLocation(CGF, Loc,
2647                          isOpenMPDistributeDirective(DKind) ||
2648                                  (DKind == OMPD_target_teams_loop)
2649                              ? OMP_IDENT_WORK_DISTRIBUTE
2650                          : isOpenMPLoopDirective(DKind)
2651                              ? OMP_IDENT_WORK_LOOP
2652                              : OMP_IDENT_WORK_SECTIONS),
2653       getThreadID(CGF, Loc)};
2654   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2655   if (isOpenMPDistributeDirective(DKind) &&
2656       CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657     CGF.EmitRuntimeCall(
2658         OMPBuilder.getOrCreateRuntimeFunction(
2659             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2660         Args);
2661   else
2662     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2663                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2664                         Args);
2665 }
2666 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2667 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668                                                  SourceLocation Loc,
2669                                                  unsigned IVSize,
2670                                                  bool IVSigned) {
2671   if (!CGF.HaveInsertPoint())
2672     return;
2673   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676                       Args);
2677 }
2678 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2679 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680                                           SourceLocation Loc, unsigned IVSize,
2681                                           bool IVSigned, Address IL,
2682                                           Address LB, Address UB,
2683                                           Address ST) {
2684   // Call __kmpc_dispatch_next(
2685   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687   //          kmp_int[32|64] *p_stride);
2688   llvm::Value *Args[] = {
2689       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690       IL.emitRawPointer(CGF), // &isLastIter
2691       LB.emitRawPointer(CGF), // &Lower
2692       UB.emitRawPointer(CGF), // &Upper
2693       ST.emitRawPointer(CGF)  // &Stride
2694   };
2695   llvm::Value *Call = CGF.EmitRuntimeCall(
2696       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2697   return CGF.EmitScalarConversion(
2698       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2699       CGF.getContext().BoolTy, Loc);
2700 }
2701 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2702 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703                                            llvm::Value *NumThreads,
2704                                            SourceLocation Loc) {
2705   if (!CGF.HaveInsertPoint())
2706     return;
2707   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708   llvm::Value *Args[] = {
2709       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2711   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2712                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2713                       Args);
2714 }
2715 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2716 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717                                          ProcBindKind ProcBind,
2718                                          SourceLocation Loc) {
2719   if (!CGF.HaveInsertPoint())
2720     return;
2721   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723   llvm::Value *Args[] = {
2724       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2726   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2727                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2728                       Args);
2729 }
2730 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2731 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734     OMPBuilder.createFlush(CGF.Builder);
2735   } else {
2736     if (!CGF.HaveInsertPoint())
2737       return;
2738     // Build call void __kmpc_flush(ident_t *loc)
2739     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740                             CGM.getModule(), OMPRTL___kmpc_flush),
2741                         emitUpdateLocation(CGF, Loc));
2742   }
2743 }
2744 
2745 namespace {
2746 /// Indexes of fields for type kmp_task_t.
2747 enum KmpTaskTFields {
2748   /// List of shared variables.
2749   KmpTaskTShareds,
2750   /// Task routine.
2751   KmpTaskTRoutine,
2752   /// Partition id for the untied tasks.
2753   KmpTaskTPartId,
2754   /// Function with call of destructors for private variables.
2755   Data1,
2756   /// Task priority.
2757   Data2,
2758   /// (Taskloops only) Lower bound.
2759   KmpTaskTLowerBound,
2760   /// (Taskloops only) Upper bound.
2761   KmpTaskTUpperBound,
2762   /// (Taskloops only) Stride.
2763   KmpTaskTStride,
2764   /// (Taskloops only) Is last iteration flag.
2765   KmpTaskTLastIter,
2766   /// (Taskloops only) Reduction data.
2767   KmpTaskTReductions,
2768 };
2769 } // anonymous namespace
2770 
createOffloadEntriesAndInfoMetadata()2771 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772   // If we are in simd mode or there are no entries, we don't need to do
2773   // anything.
2774   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775     return;
2776 
2777   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780     SourceLocation Loc;
2781     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783                 E = CGM.getContext().getSourceManager().fileinfo_end();
2784            I != E; ++I) {
2785         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788               I->getFirst(), EntryInfo.Line, 1);
2789           break;
2790         }
2791       }
2792     }
2793     switch (Kind) {
2794     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796           DiagnosticsEngine::Error, "Offloading entry for target region in "
2797                                     "%0 is incorrect: either the "
2798                                     "address or the ID is invalid.");
2799       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800     } break;
2801     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803           DiagnosticsEngine::Error, "Offloading entry for declare target "
2804                                     "variable %0 is incorrect: the "
2805                                     "address is invalid.");
2806       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807     } break;
2808     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810           DiagnosticsEngine::Error,
2811           "Offloading entry for declare target variable is incorrect: the "
2812           "address is invalid.");
2813       CGM.getDiags().Report(DiagID);
2814     } break;
2815     }
2816   };
2817 
2818   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2819 }
2820 
emitKmpRoutineEntryT(QualType KmpInt32Ty)2821 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822   if (!KmpRoutineEntryPtrTy) {
2823     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824     ASTContext &C = CGM.getContext();
2825     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826     FunctionProtoType::ExtProtoInfo EPI;
2827     KmpRoutineEntryPtrQTy = C.getPointerType(
2828         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2829     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2830   }
2831 }
2832 
2833 namespace {
2834 struct PrivateHelpersTy {
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2835   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838         PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anon93cce0fb0e11::PrivateHelpersTy2839   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840   const Expr *OriginalRef = nullptr;
2841   const VarDecl *Original = nullptr;
2842   const VarDecl *PrivateCopy = nullptr;
2843   const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anon93cce0fb0e11::PrivateHelpersTy2844   bool isLocalPrivate() const {
2845     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846   }
2847 };
2848 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849 } // anonymous namespace
2850 
isAllocatableDecl(const VarDecl * VD)2851 static bool isAllocatableDecl(const VarDecl *VD) {
2852   const VarDecl *CVD = VD->getCanonicalDecl();
2853   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854     return false;
2855   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856   // Use the default allocation.
2857   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858            !AA->getAllocator());
2859 }
2860 
2861 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)2862 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863   if (!Privates.empty()) {
2864     ASTContext &C = CGM.getContext();
2865     // Build struct .kmp_privates_t. {
2866     //         /*  private vars  */
2867     //       };
2868     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2869     RD->startDefinition();
2870     for (const auto &Pair : Privates) {
2871       const VarDecl *VD = Pair.second.Original;
2872       QualType Type = VD->getType().getNonReferenceType();
2873       // If the private variable is a local variable with lvalue ref type,
2874       // allocate the pointer instead of the pointee type.
2875       if (Pair.second.isLocalPrivate()) {
2876         if (VD->getType()->isLValueReferenceType())
2877           Type = C.getPointerType(Type);
2878         if (isAllocatableDecl(VD))
2879           Type = C.getPointerType(Type);
2880       }
2881       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2882       if (VD->hasAttrs()) {
2883         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884              E(VD->getAttrs().end());
2885              I != E; ++I)
2886           FD->addAttr(*I);
2887       }
2888     }
2889     RD->completeDefinition();
2890     return RD;
2891   }
2892   return nullptr;
2893 }
2894 
2895 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)2896 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897                          QualType KmpInt32Ty,
2898                          QualType KmpRoutineEntryPointerQTy) {
2899   ASTContext &C = CGM.getContext();
2900   // Build struct kmp_task_t {
2901   //         void *              shareds;
2902   //         kmp_routine_entry_t routine;
2903   //         kmp_int32           part_id;
2904   //         kmp_cmplrdata_t data1;
2905   //         kmp_cmplrdata_t data2;
2906   // For taskloops additional fields:
2907   //         kmp_uint64          lb;
2908   //         kmp_uint64          ub;
2909   //         kmp_int64           st;
2910   //         kmp_int32           liter;
2911   //         void *              reductions;
2912   //       };
2913   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2914   UD->startDefinition();
2915   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2916   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2917   UD->completeDefinition();
2918   QualType KmpCmplrdataTy = C.getRecordType(UD);
2919   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2920   RD->startDefinition();
2921   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2922   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2923   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2924   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2925   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2926   if (isOpenMPTaskLoopDirective(Kind)) {
2927     QualType KmpUInt64Ty =
2928         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929     QualType KmpInt64Ty =
2930         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2932     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2933     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2934     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2935     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2936   }
2937   RD->completeDefinition();
2938   return RD;
2939 }
2940 
2941 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)2942 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943                                      ArrayRef<PrivateDataTy> Privates) {
2944   ASTContext &C = CGM.getContext();
2945   // Build struct kmp_task_t_with_privates {
2946   //         kmp_task_t task_data;
2947   //         .kmp_privates_t. privates;
2948   //       };
2949   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2950   RD->startDefinition();
2951   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2952   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2954   RD->completeDefinition();
2955   return RD;
2956 }
2957 
2958 /// Emit a proxy function which accepts kmp_task_t as the second
2959 /// argument.
2960 /// \code
2961 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963 ///   For taskloops:
2964 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965 ///   tt->reductions, tt->shareds);
2966 ///   return 0;
2967 /// }
2968 /// \endcode
2969 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)2970 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972                       QualType KmpTaskTWithPrivatesPtrQTy,
2973                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975                       llvm::Value *TaskPrivatesMap) {
2976   ASTContext &C = CGM.getContext();
2977   FunctionArgList Args;
2978   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979                             ImplicitParamKind::Other);
2980   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982                                 ImplicitParamKind::Other);
2983   Args.push_back(&GtidArg);
2984   Args.push_back(&TaskTypeArg);
2985   const auto &TaskEntryFnInfo =
2986       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2987   llvm::FunctionType *TaskEntryTy =
2988       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2989   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2990   auto *TaskEntry = llvm::Function::Create(
2991       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2992   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2993   TaskEntry->setDoesNotRecurse();
2994   CodeGenFunction CGF(CGM);
2995   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
2996                     Loc, Loc);
2997 
2998   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999   // tt,
3000   // For taskloops:
3001   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002   // tt->task_data.shareds);
3003   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3005   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3007       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008   const auto *KmpTaskTWithPrivatesQTyRD =
3009       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010   LValue Base =
3011       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3012   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3013   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3014   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3015   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016 
3017   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3018   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3019   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3021       CGF.ConvertTypeForMem(SharedsPtrTy));
3022 
3023   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3024   llvm::Value *PrivatesParam;
3025   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3027     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3029   } else {
3030     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3031   }
3032 
3033   llvm::Value *CommonArgs[] = {
3034       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035       CGF.Builder
3036           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3037                                                CGF.VoidPtrTy, CGF.Int8Ty)
3038           .emitRawPointer(CGF)};
3039   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3040                                           std::end(CommonArgs));
3041   if (isOpenMPTaskLoopDirective(Kind)) {
3042     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3043     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3044     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3045     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3046     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3047     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3048     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3049     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3050     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3051     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3052     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3053     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3054     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3055     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3056     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3057     CallArgs.push_back(LBParam);
3058     CallArgs.push_back(UBParam);
3059     CallArgs.push_back(StParam);
3060     CallArgs.push_back(LIParam);
3061     CallArgs.push_back(RParam);
3062   }
3063   CallArgs.push_back(SharedsParam);
3064 
3065   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3066                                                   CallArgs);
3067   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3068                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3069   CGF.FinishFunction();
3070   return TaskEntry;
3071 }
3072 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3073 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074                                             SourceLocation Loc,
3075                                             QualType KmpInt32Ty,
3076                                             QualType KmpTaskTWithPrivatesPtrQTy,
3077                                             QualType KmpTaskTWithPrivatesQTy) {
3078   ASTContext &C = CGM.getContext();
3079   FunctionArgList Args;
3080   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081                             ImplicitParamKind::Other);
3082   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084                                 ImplicitParamKind::Other);
3085   Args.push_back(&GtidArg);
3086   Args.push_back(&TaskTypeArg);
3087   const auto &DestructorFnInfo =
3088       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3089   llvm::FunctionType *DestructorFnTy =
3090       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3091   std::string Name =
3092       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3093   auto *DestructorFn =
3094       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3095                              Name, &CGM.getModule());
3096   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3097                                     DestructorFnInfo);
3098   DestructorFn->setDoesNotRecurse();
3099   CodeGenFunction CGF(CGM);
3100   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3101                     Args, Loc, Loc);
3102 
3103   LValue Base = CGF.EmitLoadOfPointerLValue(
3104       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3105       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106   const auto *KmpTaskTWithPrivatesQTyRD =
3107       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3109   Base = CGF.EmitLValueForField(Base, *FI);
3110   for (const auto *Field :
3111        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3112     if (QualType::DestructionKind DtorKind =
3113             Field->getType().isDestructedType()) {
3114       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3116     }
3117   }
3118   CGF.FinishFunction();
3119   return DestructorFn;
3120 }
3121 
3122 /// Emit a privates mapping function for correct handling of private and
3123 /// firstprivate variables.
3124 /// \code
3125 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126 /// **noalias priv1,...,  <tyn> **noalias privn) {
3127 ///   *priv1 = &.privates.priv1;
3128 ///   ...;
3129 ///   *privn = &.privates.privn;
3130 /// }
3131 /// \endcode
3132 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3133 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135                                ArrayRef<PrivateDataTy> Privates) {
3136   ASTContext &C = CGM.getContext();
3137   FunctionArgList Args;
3138   ImplicitParamDecl TaskPrivatesArg(
3139       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3141       ImplicitParamKind::Other);
3142   Args.push_back(&TaskPrivatesArg);
3143   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144   unsigned Counter = 1;
3145   for (const Expr *E : Data.PrivateVars) {
3146     Args.push_back(ImplicitParamDecl::Create(
3147         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3148         C.getPointerType(C.getPointerType(E->getType()))
3149             .withConst()
3150             .withRestrict(),
3151         ImplicitParamKind::Other));
3152     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3153     PrivateVarsPos[VD] = Counter;
3154     ++Counter;
3155   }
3156   for (const Expr *E : Data.FirstprivateVars) {
3157     Args.push_back(ImplicitParamDecl::Create(
3158         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3159         C.getPointerType(C.getPointerType(E->getType()))
3160             .withConst()
3161             .withRestrict(),
3162         ImplicitParamKind::Other));
3163     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3164     PrivateVarsPos[VD] = Counter;
3165     ++Counter;
3166   }
3167   for (const Expr *E : Data.LastprivateVars) {
3168     Args.push_back(ImplicitParamDecl::Create(
3169         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3170         C.getPointerType(C.getPointerType(E->getType()))
3171             .withConst()
3172             .withRestrict(),
3173         ImplicitParamKind::Other));
3174     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3175     PrivateVarsPos[VD] = Counter;
3176     ++Counter;
3177   }
3178   for (const VarDecl *VD : Data.PrivateLocals) {
3179     QualType Ty = VD->getType().getNonReferenceType();
3180     if (VD->getType()->isLValueReferenceType())
3181       Ty = C.getPointerType(Ty);
3182     if (isAllocatableDecl(VD))
3183       Ty = C.getPointerType(Ty);
3184     Args.push_back(ImplicitParamDecl::Create(
3185         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3186         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3187         ImplicitParamKind::Other));
3188     PrivateVarsPos[VD] = Counter;
3189     ++Counter;
3190   }
3191   const auto &TaskPrivatesMapFnInfo =
3192       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3193   llvm::FunctionType *TaskPrivatesMapTy =
3194       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3195   std::string Name =
3196       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3197   auto *TaskPrivatesMap = llvm::Function::Create(
3198       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3199       &CGM.getModule());
3200   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3201                                     TaskPrivatesMapFnInfo);
3202   if (CGM.getLangOpts().Optimize) {
3203     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3204     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3205     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3206   }
3207   CodeGenFunction CGF(CGM);
3208   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3209                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3210 
3211   // *privi = &.privates.privi;
3212   LValue Base = CGF.EmitLoadOfPointerLValue(
3213       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3214       TaskPrivatesArg.getType()->castAs<PointerType>());
3215   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3216   Counter = 0;
3217   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220     LValue RefLVal =
3221         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3222     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3224     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3225     ++Counter;
3226   }
3227   CGF.FinishFunction();
3228   return TaskPrivatesMap;
3229 }
3230 
3231 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3232 static void emitPrivatesInit(CodeGenFunction &CGF,
3233                              const OMPExecutableDirective &D,
3234                              Address KmpTaskSharedsPtr, LValue TDBase,
3235                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236                              QualType SharedsTy, QualType SharedsPtrTy,
3237                              const OMPTaskDataTy &Data,
3238                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239   ASTContext &C = CGF.getContext();
3240   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3241   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3242   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3243                                  ? OMPD_taskloop
3244                                  : OMPD_task;
3245   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3246   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247   LValue SrcBase;
3248   bool IsTargetTask =
3249       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3250       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3251   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252   // PointersArray, SizesArray, and MappersArray. The original variables for
3253   // these arrays are not captured and we get their addresses explicitly.
3254   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256     SrcBase = CGF.MakeAddrLValue(
3257         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3259             CGF.ConvertTypeForMem(SharedsTy)),
3260         SharedsTy);
3261   }
3262   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3263   for (const PrivateDataTy &Pair : Privates) {
3264     // Do not initialize private locals.
3265     if (Pair.second.isLocalPrivate()) {
3266       ++FI;
3267       continue;
3268     }
3269     const VarDecl *VD = Pair.second.PrivateCopy;
3270     const Expr *Init = VD->getAnyInitializer();
3271     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3272                              !CGF.isTrivialInitializer(Init)))) {
3273       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3274       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275         const VarDecl *OriginalVD = Pair.second.Original;
3276         // Check if the variable is the target-based BasePointersArray,
3277         // PointersArray, SizesArray, or MappersArray.
3278         LValue SharedRefLValue;
3279         QualType Type = PrivateLValue.getType();
3280         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3281         if (IsTargetTask && !SharedField) {
3282           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3285                          ->getNumParams() == 0 &&
3286                  isa<TranslationUnitDecl>(
3287                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3288                          ->getDeclContext()) &&
3289                  "Expected artificial target data variable.");
3290           SharedRefLValue =
3291               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3292         } else if (ForDup) {
3293           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3294           SharedRefLValue = CGF.MakeAddrLValue(
3295               SharedRefLValue.getAddress().withAlignment(
3296                   C.getDeclAlign(OriginalVD)),
3297               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3298               SharedRefLValue.getTBAAInfo());
3299         } else if (CGF.LambdaCaptureFields.count(
3300                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3301                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3302           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3303         } else {
3304           // Processing for implicitly captured variables.
3305           InlinedOpenMPRegionRAII Region(
3306               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307               /*HasCancel=*/false, /*NoInheritance=*/true);
3308           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3309         }
3310         if (Type->isArrayType()) {
3311           // Initialize firstprivate array.
3312           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3313             // Perform simple memcpy.
3314             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3315           } else {
3316             // Initialize firstprivate array using element-by-element
3317             // initialization.
3318             CGF.EmitOMPAggregateAssign(
3319                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3320                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321                                                   Address SrcElement) {
3322                   // Clean up any temporaries needed by the initialization.
3323                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324                   InitScope.addPrivate(Elem, SrcElement);
3325                   (void)InitScope.Privatize();
3326                   // Emit initialization for single element.
3327                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328                       CGF, &CapturesInfo);
3329                   CGF.EmitAnyExprToMem(Init, DestElement,
3330                                        Init->getType().getQualifiers(),
3331                                        /*IsInitializer=*/false);
3332                 });
3333           }
3334         } else {
3335           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336           InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3337           (void)InitScope.Privatize();
3338           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3340                              /*capturedByInit=*/false);
3341         }
3342       } else {
3343         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3344       }
3345     }
3346     ++FI;
3347   }
3348 }
3349 
3350 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3351 static bool checkInitIsRequired(CodeGenFunction &CGF,
3352                                 ArrayRef<PrivateDataTy> Privates) {
3353   bool InitRequired = false;
3354   for (const PrivateDataTy &Pair : Privates) {
3355     if (Pair.second.isLocalPrivate())
3356       continue;
3357     const VarDecl *VD = Pair.second.PrivateCopy;
3358     const Expr *Init = VD->getAnyInitializer();
3359     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3360                                     !CGF.isTrivialInitializer(Init));
3361     if (InitRequired)
3362       break;
3363   }
3364   return InitRequired;
3365 }
3366 
3367 
3368 /// Emit task_dup function (for initialization of
3369 /// private/firstprivate/lastprivate vars and last_iter flag)
3370 /// \code
3371 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372 /// lastpriv) {
3373 /// // setup lastprivate flag
3374 ///    task_dst->last = lastpriv;
3375 /// // could be constructor calls here...
3376 /// }
3377 /// \endcode
3378 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3379 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380                     const OMPExecutableDirective &D,
3381                     QualType KmpTaskTWithPrivatesPtrQTy,
3382                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386   ASTContext &C = CGM.getContext();
3387   FunctionArgList Args;
3388   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389                            KmpTaskTWithPrivatesPtrQTy,
3390                            ImplicitParamKind::Other);
3391   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392                            KmpTaskTWithPrivatesPtrQTy,
3393                            ImplicitParamKind::Other);
3394   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395                                 ImplicitParamKind::Other);
3396   Args.push_back(&DstArg);
3397   Args.push_back(&SrcArg);
3398   Args.push_back(&LastprivArg);
3399   const auto &TaskDupFnInfo =
3400       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3401   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3402   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3403   auto *TaskDup = llvm::Function::Create(
3404       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3405   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3406   TaskDup->setDoesNotRecurse();
3407   CodeGenFunction CGF(CGM);
3408   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3409                     Loc);
3410 
3411   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412       CGF.GetAddrOfLocalVar(&DstArg),
3413       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414   // task_dst->liter = lastpriv;
3415   if (WithLastIter) {
3416     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3417     LValue Base = CGF.EmitLValueForField(
3418         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3419     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3420     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3422     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3423   }
3424 
3425   // Emit initial values for private copies (if any).
3426   assert(!Privates.empty());
3427   Address KmpTaskSharedsPtr = Address::invalid();
3428   if (!Data.FirstprivateVars.empty()) {
3429     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430         CGF.GetAddrOfLocalVar(&SrcArg),
3431         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432     LValue Base = CGF.EmitLValueForField(
3433         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3434     KmpTaskSharedsPtr = Address(
3435         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3436                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3437                                                   KmpTaskTShareds)),
3438                              Loc),
3439         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3440   }
3441   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443   CGF.FinishFunction();
3444   return TaskDup;
3445 }
3446 
3447 /// Checks if destructor function is required to be generated.
3448 /// \return true if cleanups are required, false otherwise.
3449 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)3450 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451                          ArrayRef<PrivateDataTy> Privates) {
3452   for (const PrivateDataTy &P : Privates) {
3453     if (P.second.isLocalPrivate())
3454       continue;
3455     QualType Ty = P.second.Original->getType().getNonReferenceType();
3456     if (Ty.isDestructedType())
3457       return true;
3458   }
3459   return false;
3460 }
3461 
3462 namespace {
3463 /// Loop generator for OpenMP iterator expression.
3464 class OMPIteratorGeneratorScope final
3465     : public CodeGenFunction::OMPPrivateScope {
3466   CodeGenFunction &CGF;
3467   const OMPIteratorExpr *E = nullptr;
3468   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470   OMPIteratorGeneratorScope() = delete;
3471   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472 
3473 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)3474   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476     if (!E)
3477       return;
3478     SmallVector<llvm::Value *, 4> Uppers;
3479     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3481       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3482       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3483       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484       addPrivate(
3485           HelperData.CounterVD,
3486           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3487     }
3488     Privatize();
3489 
3490     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492       LValue CLVal =
3493           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3494                              HelperData.CounterVD->getType());
3495       // Counter = 0;
3496       CGF.EmitStoreOfScalar(
3497           llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3498           CLVal);
3499       CodeGenFunction::JumpDest &ContDest =
3500           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3501       CodeGenFunction::JumpDest &ExitDest =
3502           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3503       // N = <number-of_iterations>;
3504       llvm::Value *N = Uppers[I];
3505       // cont:
3506       // if (Counter < N) goto body; else goto exit;
3507       CGF.EmitBlock(ContDest.getBlock());
3508       auto *CVal =
3509           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3510       llvm::Value *Cmp =
3511           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512               ? CGF.Builder.CreateICmpSLT(CVal, N)
3513               : CGF.Builder.CreateICmpULT(CVal, N);
3514       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3515       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3516       // body:
3517       CGF.EmitBlock(BodyBB);
3518       // Iteri = Begini + Counter * Stepi;
3519       CGF.EmitIgnoredExpr(HelperData.Update);
3520     }
3521   }
~OMPIteratorGeneratorScope()3522   ~OMPIteratorGeneratorScope() {
3523     if (!E)
3524       return;
3525     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526       // Counter = Counter + 1;
3527       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3528       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3529       // goto cont;
3530       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3531       // exit:
3532       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533     }
3534   }
3535 };
3536 } // namespace
3537 
3538 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)3539 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3541   llvm::Value *Addr;
3542   if (OASE) {
3543     const Expr *Base = OASE->getBase();
3544     Addr = CGF.EmitScalarExpr(Base);
3545   } else {
3546     Addr = CGF.EmitLValue(E).getPointer(CGF);
3547   }
3548   llvm::Value *SizeVal;
3549   QualType Ty = E->getType();
3550   if (OASE) {
3551     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3552     for (const Expr *SE : OASE->getDimensions()) {
3553       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3554       Sz = CGF.EmitScalarConversion(
3555           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3556       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3557     }
3558   } else if (const auto *ASE =
3559                  dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3560     LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3561     Address UpAddrAddress = UpAddrLVal.getAddress();
3562     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563         UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3564         /*Idx0=*/1);
3565     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3566     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3567     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3568   } else {
3569     SizeVal = CGF.getTypeSize(Ty);
3570   }
3571   return std::make_pair(Addr, SizeVal);
3572 }
3573 
3574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)3575 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3577   if (KmpTaskAffinityInfoTy.isNull()) {
3578     RecordDecl *KmpAffinityInfoRD =
3579         C.buildImplicitRecord("kmp_task_affinity_info_t");
3580     KmpAffinityInfoRD->startDefinition();
3581     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3582     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3583     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3584     KmpAffinityInfoRD->completeDefinition();
3585     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3586   }
3587 }
3588 
3589 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)3590 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591                               const OMPExecutableDirective &D,
3592                               llvm::Function *TaskFunction, QualType SharedsTy,
3593                               Address Shareds, const OMPTaskDataTy &Data) {
3594   ASTContext &C = CGM.getContext();
3595   llvm::SmallVector<PrivateDataTy, 4> Privates;
3596   // Aggregate privates and sort them by the alignment.
3597   const auto *I = Data.PrivateCopies.begin();
3598   for (const Expr *E : Data.PrivateVars) {
3599     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3600     Privates.emplace_back(
3601         C.getDeclAlign(VD),
3602         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3603                          /*PrivateElemInit=*/nullptr));
3604     ++I;
3605   }
3606   I = Data.FirstprivateCopies.begin();
3607   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608   for (const Expr *E : Data.FirstprivateVars) {
3609     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3610     Privates.emplace_back(
3611         C.getDeclAlign(VD),
3612         PrivateHelpersTy(
3613             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3614             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3615     ++I;
3616     ++IElemInitRef;
3617   }
3618   I = Data.LastprivateCopies.begin();
3619   for (const Expr *E : Data.LastprivateVars) {
3620     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3621     Privates.emplace_back(
3622         C.getDeclAlign(VD),
3623         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3624                          /*PrivateElemInit=*/nullptr));
3625     ++I;
3626   }
3627   for (const VarDecl *VD : Data.PrivateLocals) {
3628     if (isAllocatableDecl(VD))
3629       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3630     else
3631       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3632   }
3633   llvm::stable_sort(Privates,
3634                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635                       return L.first > R.first;
3636                     });
3637   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638   // Build type kmp_routine_entry_t (if not built yet).
3639   emitKmpRoutineEntryT(KmpInt32Ty);
3640   // Build type kmp_task_t (if not built yet).
3641   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3642     if (SavedKmpTaskloopTQTy.isNull()) {
3643       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3644           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3645     }
3646     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647   } else {
3648     assert((D.getDirectiveKind() == OMPD_task ||
3649             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651            "Expected taskloop, task or target directive");
3652     if (SavedKmpTaskTQTy.isNull()) {
3653       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655     }
3656     KmpTaskTQTy = SavedKmpTaskTQTy;
3657   }
3658   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3659   // Build particular struct kmp_task_t for the given task.
3660   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3663   QualType KmpTaskTWithPrivatesPtrQTy =
3664       C.getPointerType(KmpTaskTWithPrivatesQTy);
3665   llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3666   llvm::Value *KmpTaskTWithPrivatesTySize =
3667       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3668   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3669 
3670   // Emit initial values for private copies (if any).
3671   llvm::Value *TaskPrivatesMap = nullptr;
3672   llvm::Type *TaskPrivatesMapTy =
3673       std::next(TaskFunction->arg_begin(), 3)->getType();
3674   if (!Privates.empty()) {
3675     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3676     TaskPrivatesMap =
3677         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3678     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679         TaskPrivatesMap, TaskPrivatesMapTy);
3680   } else {
3681     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682         cast<llvm::PointerType>(TaskPrivatesMapTy));
3683   }
3684   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685   // kmp_task_t *tt);
3686   llvm::Function *TaskEntry = emitProxyTaskFunction(
3687       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689       TaskPrivatesMap);
3690 
3691   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693   // kmp_routine_entry_t *task_entry);
3694   // Task flags. Format is taken from
3695   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696   // description of kmp_tasking_flags struct.
3697   enum {
3698     TiedFlag = 0x1,
3699     FinalFlag = 0x2,
3700     DestructorsFlag = 0x8,
3701     PriorityFlag = 0x20,
3702     DetachableFlag = 0x40,
3703   };
3704   unsigned Flags = Data.Tied ? TiedFlag : 0;
3705   bool NeedsCleanup = false;
3706   if (!Privates.empty()) {
3707     NeedsCleanup =
3708         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709     if (NeedsCleanup)
3710       Flags = Flags | DestructorsFlag;
3711   }
3712   if (Data.Priority.getInt())
3713     Flags = Flags | PriorityFlag;
3714   if (D.hasClausesOfKind<OMPDetachClause>())
3715     Flags = Flags | DetachableFlag;
3716   llvm::Value *TaskFlags =
3717       Data.Final.getPointer()
3718           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3719                                      CGF.Builder.getInt32(FinalFlag),
3720                                      CGF.Builder.getInt32(/*C=*/0))
3721           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3722   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3723   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3724   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727           TaskEntry, KmpRoutineEntryPtrTy)};
3728   llvm::Value *NewTask;
3729   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730     // Check if we have any device clause associated with the directive.
3731     const Expr *Device = nullptr;
3732     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733       Device = C->getDevice();
3734     // Emit device ID if any otherwise use default value.
3735     llvm::Value *DeviceID;
3736     if (Device)
3737       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3738                                            CGF.Int64Ty, /*isSigned=*/true);
3739     else
3740       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3741     AllocArgs.push_back(DeviceID);
3742     NewTask = CGF.EmitRuntimeCall(
3743         OMPBuilder.getOrCreateRuntimeFunction(
3744             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3745         AllocArgs);
3746   } else {
3747     NewTask =
3748         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3749                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3750                             AllocArgs);
3751   }
3752   // Emit detach clause initialization.
3753   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754   // task_descriptor);
3755   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757     LValue EvtLVal = CGF.EmitLValue(Evt);
3758 
3759     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760     // int gtid, kmp_task_t *task);
3761     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3762     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3763     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3764     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765         OMPBuilder.getOrCreateRuntimeFunction(
3766             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3767         {Loc, Tid, NewTask});
3768     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3769                                       Evt->getExprLoc());
3770     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3771   }
3772   // Process affinity clauses.
3773   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774     // Process list of affinity data.
3775     ASTContext &C = CGM.getContext();
3776     Address AffinitiesArray = Address::invalid();
3777     // Calculate number of elements to form the array of affinity data.
3778     llvm::Value *NumOfElements = nullptr;
3779     unsigned NumAffinities = 0;
3780     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781       if (const Expr *Modifier = C->getModifier()) {
3782         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3783         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3785           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3786           NumOfElements =
3787               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3788         }
3789       } else {
3790         NumAffinities += C->varlist_size();
3791       }
3792     }
3793     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3794     // Fields ids in kmp_task_affinity_info record.
3795     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796 
3797     QualType KmpTaskAffinityInfoArrayTy;
3798     if (NumOfElements) {
3799       NumOfElements = CGF.Builder.CreateNUWAdd(
3800           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3801       auto *OVE = new (C) OpaqueValueExpr(
3802           Loc,
3803           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3804           VK_PRValue);
3805       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806                                                     RValue::get(NumOfElements));
3807       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3809           /*IndexTypeQuals=*/0);
3810       // Properly emit variable-sized array.
3811       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3812                                            ImplicitParamKind::Other);
3813       CGF.EmitVarDecl(*PD);
3814       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3815       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3816                                                 /*isSigned=*/false);
3817     } else {
3818       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819           KmpTaskAffinityInfoTy,
3820           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3821           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822       AffinitiesArray =
3823           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3824       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3825       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3826                                              /*isSigned=*/false);
3827     }
3828 
3829     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830     // Fill array by elements without iterators.
3831     unsigned Pos = 0;
3832     bool HasIterator = false;
3833     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834       if (C->getModifier()) {
3835         HasIterator = true;
3836         continue;
3837       }
3838       for (const Expr *E : C->varlist()) {
3839         llvm::Value *Addr;
3840         llvm::Value *Size;
3841         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3842         LValue Base =
3843             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3844                                KmpTaskAffinityInfoTy);
3845         // affs[i].base_addr = &<Affinities[i].second>;
3846         LValue BaseAddrLVal = CGF.EmitLValueForField(
3847             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3848         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3849                               BaseAddrLVal);
3850         // affs[i].len = sizeof(<Affinities[i].second>);
3851         LValue LenLVal = CGF.EmitLValueForField(
3852             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3853         CGF.EmitStoreOfScalar(Size, LenLVal);
3854         ++Pos;
3855       }
3856     }
3857     LValue PosLVal;
3858     if (HasIterator) {
3859       PosLVal = CGF.MakeAddrLValue(
3860           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3861           C.getSizeType());
3862       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3863     }
3864     // Process elements with iterators.
3865     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866       const Expr *Modifier = C->getModifier();
3867       if (!Modifier)
3868         continue;
3869       OMPIteratorGeneratorScope IteratorScope(
3870           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3871       for (const Expr *E : C->varlist()) {
3872         llvm::Value *Addr;
3873         llvm::Value *Size;
3874         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3875         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3876         LValue Base =
3877             CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3878                                KmpTaskAffinityInfoTy);
3879         // affs[i].base_addr = &<Affinities[i].second>;
3880         LValue BaseAddrLVal = CGF.EmitLValueForField(
3881             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3882         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3883                               BaseAddrLVal);
3884         // affs[i].len = sizeof(<Affinities[i].second>);
3885         LValue LenLVal = CGF.EmitLValueForField(
3886             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3887         CGF.EmitStoreOfScalar(Size, LenLVal);
3888         Idx = CGF.Builder.CreateNUWAdd(
3889             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3890         CGF.EmitStoreOfScalar(Idx, PosLVal);
3891       }
3892     }
3893     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895     // naffins, kmp_task_affinity_info_t *affin_list);
3896     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897     llvm::Value *GTid = getThreadID(CGF, Loc);
3898     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899         AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3900     // FIXME: Emit the function and ignore its result for now unless the
3901     // runtime function is properly implemented.
3902     (void)CGF.EmitRuntimeCall(
3903         OMPBuilder.getOrCreateRuntimeFunction(
3904             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3905         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906   }
3907   llvm::Value *NewTaskNewTaskTTy =
3908       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909           NewTask, KmpTaskTWithPrivatesPtrTy);
3910   LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3911                                                   KmpTaskTWithPrivatesQTy);
3912   LValue TDBase =
3913       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3914   // Fill the data in the resulting kmp_task_t record.
3915   // Copy shareds if there are any.
3916   Address KmpTaskSharedsPtr = Address::invalid();
3917   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918     KmpTaskSharedsPtr = Address(
3919         CGF.EmitLoadOfScalar(
3920             CGF.EmitLValueForField(
3921                 TDBase,
3922                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3923             Loc),
3924         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3925     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3926     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3927     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3928   }
3929   // Emit initial values for private copies (if any).
3930   TaskResultTy Result;
3931   if (!Privates.empty()) {
3932     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3933                      SharedsTy, SharedsPtrTy, Data, Privates,
3934                      /*ForDup=*/false);
3935     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3936         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937       Result.TaskDupFn = emitTaskDupFunction(
3938           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940           /*WithLastIter=*/!Data.LastprivateVars.empty());
3941     }
3942   }
3943   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944   enum { Priority = 0, Destructors = 1 };
3945   // Provide pointer to function with destructors for privates.
3946   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3947   const RecordDecl *KmpCmplrdataUD =
3948       (*FI)->getType()->getAsUnionType()->getDecl();
3949   if (NeedsCleanup) {
3950     llvm::Value *DestructorFn = emitDestructorsFunction(
3951         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952         KmpTaskTWithPrivatesQTy);
3953     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3954     LValue DestructorsLV = CGF.EmitLValueForField(
3955         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3956     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957                               DestructorFn, KmpRoutineEntryPtrTy),
3958                           DestructorsLV);
3959   }
3960   // Set priority.
3961   if (Data.Priority.getInt()) {
3962     LValue Data2LV = CGF.EmitLValueForField(
3963         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3964     LValue PriorityLV = CGF.EmitLValueForField(
3965         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3966     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3967   }
3968   Result.NewTask = NewTask;
3969   Result.TaskEntry = TaskEntry;
3970   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971   Result.TDBase = TDBase;
3972   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973   return Result;
3974 }
3975 
3976 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)3977 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978   RTLDependenceKindTy DepKind;
3979   switch (K) {
3980   case OMPC_DEPEND_in:
3981     DepKind = RTLDependenceKindTy::DepIn;
3982     break;
3983   // Out and InOut dependencies must use the same code.
3984   case OMPC_DEPEND_out:
3985   case OMPC_DEPEND_inout:
3986     DepKind = RTLDependenceKindTy::DepInOut;
3987     break;
3988   case OMPC_DEPEND_mutexinoutset:
3989     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990     break;
3991   case OMPC_DEPEND_inoutset:
3992     DepKind = RTLDependenceKindTy::DepInOutSet;
3993     break;
3994   case OMPC_DEPEND_outallmemory:
3995     DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996     break;
3997   case OMPC_DEPEND_source:
3998   case OMPC_DEPEND_sink:
3999   case OMPC_DEPEND_depobj:
4000   case OMPC_DEPEND_inoutallmemory:
4001   case OMPC_DEPEND_unknown:
4002     llvm_unreachable("Unknown task dependence type");
4003   }
4004   return DepKind;
4005 }
4006 
4007 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4008 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009                            QualType &FlagsTy) {
4010   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4011   if (KmpDependInfoTy.isNull()) {
4012     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4013     KmpDependInfoRD->startDefinition();
4014     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4015     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4016     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4017     KmpDependInfoRD->completeDefinition();
4018     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4019   }
4020 }
4021 
4022 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4023 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024                                    SourceLocation Loc) {
4025   ASTContext &C = CGM.getContext();
4026   QualType FlagsTy;
4027   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028   RecordDecl *KmpDependInfoRD =
4029       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4030   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4031   LValue Base = CGF.EmitLoadOfPointerLValue(
4032       DepobjLVal.getAddress().withElementType(
4033           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4034       KmpDependInfoPtrTy->castAs<PointerType>());
4035   Address DepObjAddr = CGF.Builder.CreateGEP(
4036       CGF, Base.getAddress(),
4037       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4038   LValue NumDepsBase = CGF.MakeAddrLValue(
4039       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4040   // NumDeps = deps[i].base_addr;
4041   LValue BaseAddrLVal = CGF.EmitLValueForField(
4042       NumDepsBase,
4043       *std::next(KmpDependInfoRD->field_begin(),
4044                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4046   return std::make_pair(NumDeps, Base);
4047 }
4048 
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4049 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4051                            const OMPTaskDataTy::DependData &Data,
4052                            Address DependenciesArray) {
4053   CodeGenModule &CGM = CGF.CGM;
4054   ASTContext &C = CGM.getContext();
4055   QualType FlagsTy;
4056   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057   RecordDecl *KmpDependInfoRD =
4058       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4059   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4060 
4061   OMPIteratorGeneratorScope IteratorScope(
4062       CGF, cast_or_null<OMPIteratorExpr>(
4063                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064                                  : nullptr));
4065   for (const Expr *E : Data.DepExprs) {
4066     llvm::Value *Addr;
4067     llvm::Value *Size;
4068 
4069     // The expression will be a nullptr in the 'omp_all_memory' case.
4070     if (E) {
4071       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4072       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4073     } else {
4074       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4075       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4076     }
4077     LValue Base;
4078     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4079       Base = CGF.MakeAddrLValue(
4080           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4081     } else {
4082       assert(E && "Expected a non-null expression");
4083       LValue &PosLVal = *cast<LValue *>(Pos);
4084       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4085       Base = CGF.MakeAddrLValue(
4086           CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4087     }
4088     // deps[i].base_addr = &<Dependencies[i].second>;
4089     LValue BaseAddrLVal = CGF.EmitLValueForField(
4090         Base,
4091         *std::next(KmpDependInfoRD->field_begin(),
4092                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4094     // deps[i].len = sizeof(<Dependencies[i].second>);
4095     LValue LenLVal = CGF.EmitLValueForField(
4096         Base, *std::next(KmpDependInfoRD->field_begin(),
4097                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098     CGF.EmitStoreOfScalar(Size, LenLVal);
4099     // deps[i].flags = <Dependencies[i].first>;
4100     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4101     LValue FlagsLVal = CGF.EmitLValueForField(
4102         Base,
4103         *std::next(KmpDependInfoRD->field_begin(),
4104                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105     CGF.EmitStoreOfScalar(
4106         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4107         FlagsLVal);
4108     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4109       ++(*P);
4110     } else {
4111       LValue &PosLVal = *cast<LValue *>(Pos);
4112       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4113       Idx = CGF.Builder.CreateNUWAdd(Idx,
4114                                      llvm::ConstantInt::get(Idx->getType(), 1));
4115       CGF.EmitStoreOfScalar(Idx, PosLVal);
4116     }
4117   }
4118 }
4119 
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4120 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122     const OMPTaskDataTy::DependData &Data) {
4123   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124          "Expected depobj dependency kind.");
4125   SmallVector<llvm::Value *, 4> Sizes;
4126   SmallVector<LValue, 4> SizeLVals;
4127   ASTContext &C = CGF.getContext();
4128   {
4129     OMPIteratorGeneratorScope IteratorScope(
4130         CGF, cast_or_null<OMPIteratorExpr>(
4131                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132                                    : nullptr));
4133     for (const Expr *E : Data.DepExprs) {
4134       llvm::Value *NumDeps;
4135       LValue Base;
4136       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4137       std::tie(NumDeps, Base) =
4138           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4139       LValue NumLVal = CGF.MakeAddrLValue(
4140           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4141           C.getUIntPtrType());
4142       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4143                               NumLVal.getAddress());
4144       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4145       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4146       CGF.EmitStoreOfScalar(Add, NumLVal);
4147       SizeLVals.push_back(NumLVal);
4148     }
4149   }
4150   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151     llvm::Value *Size =
4152         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4153     Sizes.push_back(Size);
4154   }
4155   return Sizes;
4156 }
4157 
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4158 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159                                          QualType &KmpDependInfoTy,
4160                                          LValue PosLVal,
4161                                          const OMPTaskDataTy::DependData &Data,
4162                                          Address DependenciesArray) {
4163   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164          "Expected depobj dependency kind.");
4165   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4166   {
4167     OMPIteratorGeneratorScope IteratorScope(
4168         CGF, cast_or_null<OMPIteratorExpr>(
4169                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170                                    : nullptr));
4171     for (const Expr *E : Data.DepExprs) {
4172       llvm::Value *NumDeps;
4173       LValue Base;
4174       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4175       std::tie(NumDeps, Base) =
4176           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4177 
4178       // memcopy dependency data.
4179       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4180           ElSize,
4181           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4182       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4183       Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4184       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4185 
4186       // Increase pos.
4187       // pos += size;
4188       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4189       CGF.EmitStoreOfScalar(Add, PosLVal);
4190     }
4191   }
4192 }
4193 
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4194 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4195     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4196     SourceLocation Loc) {
4197   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4198         return D.DepExprs.empty();
4199       }))
4200     return std::make_pair(nullptr, Address::invalid());
4201   // Process list of dependencies.
4202   ASTContext &C = CGM.getContext();
4203   Address DependenciesArray = Address::invalid();
4204   llvm::Value *NumOfElements = nullptr;
4205   unsigned NumDependencies = std::accumulate(
4206       Dependencies.begin(), Dependencies.end(), 0,
4207       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4208         return D.DepKind == OMPC_DEPEND_depobj
4209                    ? V
4210                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4211       });
4212   QualType FlagsTy;
4213   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4214   bool HasDepobjDeps = false;
4215   bool HasRegularWithIterators = false;
4216   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4217   llvm::Value *NumOfRegularWithIterators =
4218       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4219   // Calculate number of depobj dependencies and regular deps with the
4220   // iterators.
4221   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4222     if (D.DepKind == OMPC_DEPEND_depobj) {
4223       SmallVector<llvm::Value *, 4> Sizes =
4224           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4225       for (llvm::Value *Size : Sizes) {
4226         NumOfDepobjElements =
4227             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4228       }
4229       HasDepobjDeps = true;
4230       continue;
4231     }
4232     // Include number of iterations, if any.
4233 
4234     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4235       llvm::Value *ClauseIteratorSpace =
4236           llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4237       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4238         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4239         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4240         ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4241       }
4242       llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4243           ClauseIteratorSpace,
4244           llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4245       NumOfRegularWithIterators =
4246           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4247       HasRegularWithIterators = true;
4248       continue;
4249     }
4250   }
4251 
4252   QualType KmpDependInfoArrayTy;
4253   if (HasDepobjDeps || HasRegularWithIterators) {
4254     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4255                                            /*isSigned=*/false);
4256     if (HasDepobjDeps) {
4257       NumOfElements =
4258           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4259     }
4260     if (HasRegularWithIterators) {
4261       NumOfElements =
4262           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4263     }
4264     auto *OVE = new (C) OpaqueValueExpr(
4265         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4266         VK_PRValue);
4267     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4268                                                   RValue::get(NumOfElements));
4269     KmpDependInfoArrayTy =
4270         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4271                                /*IndexTypeQuals=*/0);
4272     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4273     // Properly emit variable-sized array.
4274     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4275                                          ImplicitParamKind::Other);
4276     CGF.EmitVarDecl(*PD);
4277     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4278     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4279                                               /*isSigned=*/false);
4280   } else {
4281     KmpDependInfoArrayTy = C.getConstantArrayType(
4282         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4283         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4284     DependenciesArray =
4285         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4286     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4287     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4288                                            /*isSigned=*/false);
4289   }
4290   unsigned Pos = 0;
4291   for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4292     if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4293       continue;
4294     emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4295   }
4296   // Copy regular dependencies with iterators.
4297   LValue PosLVal = CGF.MakeAddrLValue(
4298       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4299   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4300   for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4301     if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4302       continue;
4303     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4304   }
4305   // Copy final depobj arrays without iterators.
4306   if (HasDepobjDeps) {
4307     for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4308       if (Dep.DepKind != OMPC_DEPEND_depobj)
4309         continue;
4310       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4311     }
4312   }
4313   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4314       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4315   return std::make_pair(NumOfElements, DependenciesArray);
4316 }
4317 
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4318 Address CGOpenMPRuntime::emitDepobjDependClause(
4319     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4320     SourceLocation Loc) {
4321   if (Dependencies.DepExprs.empty())
4322     return Address::invalid();
4323   // Process list of dependencies.
4324   ASTContext &C = CGM.getContext();
4325   Address DependenciesArray = Address::invalid();
4326   unsigned NumDependencies = Dependencies.DepExprs.size();
4327   QualType FlagsTy;
4328   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4329   RecordDecl *KmpDependInfoRD =
4330       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4331 
4332   llvm::Value *Size;
4333   // Define type kmp_depend_info[<Dependencies.size()>];
4334   // For depobj reserve one extra element to store the number of elements.
4335   // It is required to handle depobj(x) update(in) construct.
4336   // kmp_depend_info[<Dependencies.size()>] deps;
4337   llvm::Value *NumDepsVal;
4338   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4339   if (const auto *IE =
4340           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4341     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4342     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4343       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4344       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4345       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4346     }
4347     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4348                                     NumDepsVal);
4349     CharUnits SizeInBytes =
4350         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4351     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4352     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4353     NumDepsVal =
4354         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4355   } else {
4356     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4357         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4358         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4359     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4360     Size = CGM.getSize(Sz.alignTo(Align));
4361     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4362   }
4363   // Need to allocate on the dynamic memory.
4364   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4365   // Use default allocator.
4366   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4367   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4368 
4369   llvm::Value *Addr =
4370       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4371                               CGM.getModule(), OMPRTL___kmpc_alloc),
4372                           Args, ".dep.arr.addr");
4373   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4374   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4375       Addr, CGF.Builder.getPtrTy(0));
4376   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4377   // Write number of elements in the first element of array for depobj.
4378   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4379   // deps[i].base_addr = NumDependencies;
4380   LValue BaseAddrLVal = CGF.EmitLValueForField(
4381       Base,
4382       *std::next(KmpDependInfoRD->field_begin(),
4383                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4384   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4385   llvm::PointerUnion<unsigned *, LValue *> Pos;
4386   unsigned Idx = 1;
4387   LValue PosLVal;
4388   if (Dependencies.IteratorExpr) {
4389     PosLVal = CGF.MakeAddrLValue(
4390         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4391         C.getSizeType());
4392     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4393                           /*IsInit=*/true);
4394     Pos = &PosLVal;
4395   } else {
4396     Pos = &Idx;
4397   }
4398   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4399   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4400       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4401       CGF.Int8Ty);
4402   return DependenciesArray;
4403 }
4404 
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4405 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4406                                         SourceLocation Loc) {
4407   ASTContext &C = CGM.getContext();
4408   QualType FlagsTy;
4409   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410   LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4411                                             C.VoidPtrTy.castAs<PointerType>());
4412   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4413   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4414       Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4415       CGF.ConvertTypeForMem(KmpDependInfoTy));
4416   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4417       Addr.getElementType(), Addr.emitRawPointer(CGF),
4418       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4419   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4420                                                                CGF.VoidPtrTy);
4421   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4422   // Use default allocator.
4423   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4424   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4425 
4426   // _kmpc_free(gtid, addr, nullptr);
4427   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4428                                 CGM.getModule(), OMPRTL___kmpc_free),
4429                             Args);
4430 }
4431 
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4432 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4433                                        OpenMPDependClauseKind NewDepKind,
4434                                        SourceLocation Loc) {
4435   ASTContext &C = CGM.getContext();
4436   QualType FlagsTy;
4437   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438   RecordDecl *KmpDependInfoRD =
4439       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4440   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4441   llvm::Value *NumDeps;
4442   LValue Base;
4443   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4444 
4445   Address Begin = Base.getAddress();
4446   // Cast from pointer to array type to pointer to single element.
4447   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4448                                            Begin.emitRawPointer(CGF), NumDeps);
4449   // The basic structure here is a while-do loop.
4450   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4451   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4452   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4453   CGF.EmitBlock(BodyBB);
4454   llvm::PHINode *ElementPHI =
4455       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4456   ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4457   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4458   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4459                             Base.getTBAAInfo());
4460   // deps[i].flags = NewDepKind;
4461   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4462   LValue FlagsLVal = CGF.EmitLValueForField(
4463       Base, *std::next(KmpDependInfoRD->field_begin(),
4464                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4465   CGF.EmitStoreOfScalar(
4466       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4467       FlagsLVal);
4468 
4469   // Shift the address forward by one element.
4470   llvm::Value *ElementNext =
4471       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4472           .emitRawPointer(CGF);
4473   ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4474   llvm::Value *IsEmpty =
4475       CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4476   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4477   // Done.
4478   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4479 }
4480 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4481 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4482                                    const OMPExecutableDirective &D,
4483                                    llvm::Function *TaskFunction,
4484                                    QualType SharedsTy, Address Shareds,
4485                                    const Expr *IfCond,
4486                                    const OMPTaskDataTy &Data) {
4487   if (!CGF.HaveInsertPoint())
4488     return;
4489 
4490   TaskResultTy Result =
4491       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4492   llvm::Value *NewTask = Result.NewTask;
4493   llvm::Function *TaskEntry = Result.TaskEntry;
4494   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4495   LValue TDBase = Result.TDBase;
4496   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4497   // Process list of dependences.
4498   Address DependenciesArray = Address::invalid();
4499   llvm::Value *NumOfElements;
4500   std::tie(NumOfElements, DependenciesArray) =
4501       emitDependClause(CGF, Data.Dependences, Loc);
4502 
4503   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4504   // libcall.
4505   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4506   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4507   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4508   // list is not empty
4509   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4510   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4511   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4512   llvm::Value *DepTaskArgs[7];
4513   if (!Data.Dependences.empty()) {
4514     DepTaskArgs[0] = UpLoc;
4515     DepTaskArgs[1] = ThreadID;
4516     DepTaskArgs[2] = NewTask;
4517     DepTaskArgs[3] = NumOfElements;
4518     DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4519     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4520     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4521   }
4522   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4523                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4524     if (!Data.Tied) {
4525       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4526       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4527       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4528     }
4529     if (!Data.Dependences.empty()) {
4530       CGF.EmitRuntimeCall(
4531           OMPBuilder.getOrCreateRuntimeFunction(
4532               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4533           DepTaskArgs);
4534     } else {
4535       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4536                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4537                           TaskArgs);
4538     }
4539     // Check if parent region is untied and build return for untied task;
4540     if (auto *Region =
4541             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4542       Region->emitUntiedSwitch(CGF);
4543   };
4544 
4545   llvm::Value *DepWaitTaskArgs[7];
4546   if (!Data.Dependences.empty()) {
4547     DepWaitTaskArgs[0] = UpLoc;
4548     DepWaitTaskArgs[1] = ThreadID;
4549     DepWaitTaskArgs[2] = NumOfElements;
4550     DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4551     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4552     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553     DepWaitTaskArgs[6] =
4554         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4555   }
4556   auto &M = CGM.getModule();
4557   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4558                         TaskEntry, &Data, &DepWaitTaskArgs,
4559                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4560     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4561     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4562     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4563     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4564     // is specified.
4565     if (!Data.Dependences.empty())
4566       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4567                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4568                           DepWaitTaskArgs);
4569     // Call proxy_task_entry(gtid, new_task);
4570     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4571                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4572       Action.Enter(CGF);
4573       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4574       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4575                                                           OutlinedFnArgs);
4576     };
4577 
4578     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4579     // kmp_task_t *new_task);
4580     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4581     // kmp_task_t *new_task);
4582     RegionCodeGenTy RCG(CodeGen);
4583     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4584                               M, OMPRTL___kmpc_omp_task_begin_if0),
4585                           TaskArgs,
4586                           OMPBuilder.getOrCreateRuntimeFunction(
4587                               M, OMPRTL___kmpc_omp_task_complete_if0),
4588                           TaskArgs);
4589     RCG.setAction(Action);
4590     RCG(CGF);
4591   };
4592 
4593   if (IfCond) {
4594     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4595   } else {
4596     RegionCodeGenTy ThenRCG(ThenCodeGen);
4597     ThenRCG(CGF);
4598   }
4599 }
4600 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4601 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602                                        const OMPLoopDirective &D,
4603                                        llvm::Function *TaskFunction,
4604                                        QualType SharedsTy, Address Shareds,
4605                                        const Expr *IfCond,
4606                                        const OMPTaskDataTy &Data) {
4607   if (!CGF.HaveInsertPoint())
4608     return;
4609   TaskResultTy Result =
4610       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4612   // libcall.
4613   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615   // sched, kmp_uint64 grainsize, void *task_dup);
4616   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618   llvm::Value *IfVal;
4619   if (IfCond) {
4620     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4621                                       /*isSigned=*/true);
4622   } else {
4623     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4624   }
4625 
4626   LValue LBLVal = CGF.EmitLValueForField(
4627       Result.TDBase,
4628       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4629   const auto *LBVar =
4630       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4631   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4632                        /*IsInitializer=*/true);
4633   LValue UBLVal = CGF.EmitLValueForField(
4634       Result.TDBase,
4635       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4636   const auto *UBVar =
4637       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4638   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4639                        /*IsInitializer=*/true);
4640   LValue StLVal = CGF.EmitLValueForField(
4641       Result.TDBase,
4642       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4643   const auto *StVar =
4644       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4645   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4646                        /*IsInitializer=*/true);
4647   // Store reductions address.
4648   LValue RedLVal = CGF.EmitLValueForField(
4649       Result.TDBase,
4650       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4651   if (Data.Reductions) {
4652     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4653   } else {
4654     CGF.EmitNullInitialization(RedLVal.getAddress(),
4655                                CGF.getContext().VoidPtrTy);
4656   }
4657   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4658   llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4659       UpLoc,
4660       ThreadID,
4661       Result.NewTask,
4662       IfVal,
4663       LBLVal.getPointer(CGF),
4664       UBLVal.getPointer(CGF),
4665       CGF.EmitLoadOfScalar(StLVal, Loc),
4666       llvm::ConstantInt::getSigned(
4667           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4668       llvm::ConstantInt::getSigned(
4669           CGF.IntTy, Data.Schedule.getPointer()
4670                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4671                          : NoSchedule),
4672       Data.Schedule.getPointer()
4673           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4674                                       /*isSigned=*/false)
4675           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4676   if (Data.HasModifier)
4677     TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4678 
4679   TaskArgs.push_back(Result.TaskDupFn
4680                          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681                                Result.TaskDupFn, CGF.VoidPtrTy)
4682                          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4683   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4684                           CGM.getModule(), Data.HasModifier
4685                                                ? OMPRTL___kmpc_taskloop_5
4686                                                : OMPRTL___kmpc_taskloop),
4687                       TaskArgs);
4688 }
4689 
4690 /// Emit reduction operation for each element of array (required for
4691 /// array sections) LHS op = RHS.
4692 /// \param Type Type of array.
4693 /// \param LHSVar Variable on the left side of the reduction operation
4694 /// (references element of array in original variable).
4695 /// \param RHSVar Variable on the right side of the reduction operation
4696 /// (references element of array in original variable).
4697 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4698 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)4699 static void EmitOMPAggregateReduction(
4700     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4701     const VarDecl *RHSVar,
4702     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4703                                   const Expr *, const Expr *)> &RedOpGen,
4704     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4705     const Expr *UpExpr = nullptr) {
4706   // Perform element-by-element initialization.
4707   QualType ElementTy;
4708   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4709   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4710 
4711   // Drill down to the base element type on both arrays.
4712   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4713   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4714 
4715   llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4716   llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4717   // Cast from pointer to array type to pointer to single element.
4718   llvm::Value *LHSEnd =
4719       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4720   // The basic structure here is a while-do loop.
4721   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4722   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4723   llvm::Value *IsEmpty =
4724       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4725   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4726 
4727   // Enter the loop body, making that address the current address.
4728   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4729   CGF.EmitBlock(BodyBB);
4730 
4731   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4732 
4733   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4734       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4735   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4736   Address RHSElementCurrent(
4737       RHSElementPHI, RHSAddr.getElementType(),
4738       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4739 
4740   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4741       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4742   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4743   Address LHSElementCurrent(
4744       LHSElementPHI, LHSAddr.getElementType(),
4745       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4746 
4747   // Emit copy.
4748   CodeGenFunction::OMPPrivateScope Scope(CGF);
4749   Scope.addPrivate(LHSVar, LHSElementCurrent);
4750   Scope.addPrivate(RHSVar, RHSElementCurrent);
4751   Scope.Privatize();
4752   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4753   Scope.ForceCleanup();
4754 
4755   // Shift the address forward by one element.
4756   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4757       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4758       "omp.arraycpy.dest.element");
4759   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4761       "omp.arraycpy.src.element");
4762   // Check whether we've reached the end.
4763   llvm::Value *Done =
4764       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4765   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4766   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4767   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4768 
4769   // Done.
4770   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4771 }
4772 
4773 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4774 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4775 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)4776 static void emitReductionCombiner(CodeGenFunction &CGF,
4777                                   const Expr *ReductionOp) {
4778   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4779     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4780       if (const auto *DRE =
4781               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4782         if (const auto *DRD =
4783                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4784           std::pair<llvm::Function *, llvm::Function *> Reduction =
4785               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4786           RValue Func = RValue::get(Reduction.first);
4787           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4788           CGF.EmitIgnoredExpr(ReductionOp);
4789           return;
4790         }
4791   CGF.EmitIgnoredExpr(ReductionOp);
4792 }
4793 
emitReductionFunction(StringRef ReducerName,SourceLocation Loc,llvm::Type * ArgsElemType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)4794 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4795     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4796     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4797     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4798   ASTContext &C = CGM.getContext();
4799 
4800   // void reduction_func(void *LHSArg, void *RHSArg);
4801   FunctionArgList Args;
4802   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4803                            ImplicitParamKind::Other);
4804   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4805                            ImplicitParamKind::Other);
4806   Args.push_back(&LHSArg);
4807   Args.push_back(&RHSArg);
4808   const auto &CGFI =
4809       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4810   std::string Name = getReductionFuncName(ReducerName);
4811   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4812                                     llvm::GlobalValue::InternalLinkage, Name,
4813                                     &CGM.getModule());
4814   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4815   Fn->setDoesNotRecurse();
4816   CodeGenFunction CGF(CGM);
4817   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4818 
4819   // Dst = (void*[n])(LHSArg);
4820   // Src = (void*[n])(RHSArg);
4821   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4823                   CGF.Builder.getPtrTy(0)),
4824               ArgsElemType, CGF.getPointerAlign());
4825   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4827                   CGF.Builder.getPtrTy(0)),
4828               ArgsElemType, CGF.getPointerAlign());
4829 
4830   //  ...
4831   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4832   //  ...
4833   CodeGenFunction::OMPPrivateScope Scope(CGF);
4834   const auto *IPriv = Privates.begin();
4835   unsigned Idx = 0;
4836   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4837     const auto *RHSVar =
4838         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4839     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4840     const auto *LHSVar =
4841         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4842     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4843     QualType PrivTy = (*IPriv)->getType();
4844     if (PrivTy->isVariablyModifiedType()) {
4845       // Get array size and emit VLA type.
4846       ++Idx;
4847       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4848       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4849       const VariableArrayType *VLA =
4850           CGF.getContext().getAsVariableArrayType(PrivTy);
4851       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4852       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4853           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4854       CGF.EmitVariablyModifiedType(PrivTy);
4855     }
4856   }
4857   Scope.Privatize();
4858   IPriv = Privates.begin();
4859   const auto *ILHS = LHSExprs.begin();
4860   const auto *IRHS = RHSExprs.begin();
4861   for (const Expr *E : ReductionOps) {
4862     if ((*IPriv)->getType()->isArrayType()) {
4863       // Emit reduction for array section.
4864       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4865       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4866       EmitOMPAggregateReduction(
4867           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4868           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4869             emitReductionCombiner(CGF, E);
4870           });
4871     } else {
4872       // Emit reduction for array subscript or single variable.
4873       emitReductionCombiner(CGF, E);
4874     }
4875     ++IPriv;
4876     ++ILHS;
4877     ++IRHS;
4878   }
4879   Scope.ForceCleanup();
4880   CGF.FinishFunction();
4881   return Fn;
4882 }
4883 
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)4884 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4885                                                   const Expr *ReductionOp,
4886                                                   const Expr *PrivateRef,
4887                                                   const DeclRefExpr *LHS,
4888                                                   const DeclRefExpr *RHS) {
4889   if (PrivateRef->getType()->isArrayType()) {
4890     // Emit reduction for array section.
4891     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4892     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4893     EmitOMPAggregateReduction(
4894         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4895         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896           emitReductionCombiner(CGF, ReductionOp);
4897         });
4898   } else {
4899     // Emit reduction for array subscript or single variable.
4900     emitReductionCombiner(CGF, ReductionOp);
4901   }
4902 }
4903 
4904 static std::string generateUniqueName(CodeGenModule &CGM,
4905                                       llvm::StringRef Prefix, const Expr *Ref);
4906 
emitPrivateReduction(CodeGenFunction & CGF,SourceLocation Loc,const Expr * Privates,const Expr * LHSExprs,const Expr * RHSExprs,const Expr * ReductionOps)4907 void CGOpenMPRuntime::emitPrivateReduction(
4908     CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4909     const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4910 
4911   //  Create a shared global variable (__shared_reduction_var) to accumulate the
4912   //  final result.
4913   //
4914   //  Call __kmpc_barrier to synchronize threads before initialization.
4915   //
4916   //  The master thread (thread_id == 0) initializes __shared_reduction_var
4917   //    with the identity value or initializer.
4918   //
4919   //  Call __kmpc_barrier to synchronize before combining.
4920   //  For each i:
4921   //    - Thread enters critical section.
4922   //    - Reads its private value from LHSExprs[i].
4923   //    - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4924   //    Privates[i]).
4925   //    - Exits critical section.
4926   //
4927   //  Call __kmpc_barrier after combining.
4928   //
4929   //  Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4930   //
4931   //  Final __kmpc_barrier to synchronize after broadcasting
4932   QualType PrivateType = Privates->getType();
4933   llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4934 
4935   const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4936   std::string ReductionVarNameStr;
4937   if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4938     ReductionVarNameStr =
4939         generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4940   else
4941     ReductionVarNameStr = "unnamed_priv_var";
4942 
4943   // Create an internal shared variable
4944   std::string SharedName =
4945       CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4946   llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4947       LLVMType, ".omp.reduction." + SharedName);
4948 
4949   SharedVar->setAlignment(
4950       llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4951 
4952   Address SharedResult =
4953       CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4954 
4955   llvm::Value *ThreadId = getThreadID(CGF, Loc);
4956   llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4957   llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4958 
4959   llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4960   llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4961 
4962   llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4963       ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4964   CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4965 
4966   CGF.EmitBlock(InitBB);
4967 
4968   auto EmitSharedInit = [&]() {
4969     if (UDR) { // Check if it's a User-Defined Reduction
4970       if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4971         std::pair<llvm::Function *, llvm::Function *> FnPair =
4972             getUserDefinedReduction(UDR);
4973         llvm::Function *InitializerFn = FnPair.second;
4974         if (InitializerFn) {
4975           if (const auto *CE =
4976                   dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
4977             const auto *OutDRE = cast<DeclRefExpr>(
4978                 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
4979                     ->getSubExpr());
4980             const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
4981 
4982             CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4983             LocalScope.addPrivate(OutVD, SharedResult);
4984 
4985             (void)LocalScope.Privatize();
4986             if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4987                     CE->getCallee()->IgnoreParenImpCasts())) {
4988               CodeGenFunction::OpaqueValueMapping OpaqueMap(
4989                   CGF, OVE, RValue::get(InitializerFn));
4990               CGF.EmitIgnoredExpr(CE);
4991             } else {
4992               CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4993                                    PrivateType.getQualifiers(),
4994                                    /*IsInitializer=*/true);
4995             }
4996           } else {
4997             CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4998                                  PrivateType.getQualifiers(),
4999                                  /*IsInitializer=*/true);
5000           }
5001         } else {
5002           CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5003                                PrivateType.getQualifiers(),
5004                                /*IsInitializer=*/true);
5005         }
5006       } else {
5007         // EmitNullInitialization handles default construction for C++ classes
5008         // and zeroing for scalars, which is a reasonable default.
5009         CGF.EmitNullInitialization(SharedResult, PrivateType);
5010       }
5011       return; // UDR initialization handled
5012     }
5013     if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5014       if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5015         if (const Expr *InitExpr = VD->getInit()) {
5016           CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5017                                PrivateType.getQualifiers(), true);
5018           return;
5019         }
5020       }
5021     }
5022     CGF.EmitNullInitialization(SharedResult, PrivateType);
5023   };
5024   EmitSharedInit();
5025   CGF.Builder.CreateBr(InitEndBB);
5026   CGF.EmitBlock(InitEndBB);
5027 
5028   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5029                           CGM.getModule(), OMPRTL___kmpc_barrier),
5030                       BarrierArgs);
5031 
5032   const Expr *ReductionOp = ReductionOps;
5033   const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5034   LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5035   LValue LHSLV = CGF.EmitLValue(Privates);
5036 
5037   auto EmitCriticalReduction = [&](auto ReductionGen) {
5038     std::string CriticalName = getName({"reduction_critical"});
5039     emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5040   };
5041 
5042   if (CurrentUDR) {
5043     // Handle user-defined reduction.
5044     auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5045       Action.Enter(CGF);
5046       std::pair<llvm::Function *, llvm::Function *> FnPair =
5047           getUserDefinedReduction(CurrentUDR);
5048       if (FnPair.first) {
5049         if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5050           const auto *OutDRE = cast<DeclRefExpr>(
5051               cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5052                   ->getSubExpr());
5053           const auto *InDRE = cast<DeclRefExpr>(
5054               cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5055                   ->getSubExpr());
5056           CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5057           LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5058                                 SharedLV.getAddress());
5059           LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5060                                 LHSLV.getAddress());
5061           (void)LocalScope.Privatize();
5062           emitReductionCombiner(CGF, ReductionOp);
5063         }
5064       }
5065     };
5066     EmitCriticalReduction(ReductionGen);
5067   } else {
5068     // Handle built-in reduction operations.
5069 #ifndef NDEBUG
5070     const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5071     if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5072       ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5073 
5074     const Expr *AssignRHS = nullptr;
5075     if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5076       if (BinOp->getOpcode() == BO_Assign)
5077         AssignRHS = BinOp->getRHS();
5078     } else if (const auto *OpCall =
5079                    dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5080       if (OpCall->getOperator() == OO_Equal)
5081         AssignRHS = OpCall->getArg(1);
5082     }
5083 
5084     assert(AssignRHS &&
5085            "Private Variable Reduction : Invalid ReductionOp expression");
5086 #endif
5087 
5088     auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5089       Action.Enter(CGF);
5090       const auto *OmpOutDRE =
5091           dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5092       const auto *OmpInDRE =
5093           dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5094       assert(
5095           OmpOutDRE && OmpInDRE &&
5096           "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5097       const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5098       const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5099       CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5100       LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5101       LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5102       (void)LocalScope.Privatize();
5103       // Emit the actual reduction operation
5104       CGF.EmitIgnoredExpr(ReductionOp);
5105     };
5106     EmitCriticalReduction(ReductionGen);
5107   }
5108 
5109   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5110                           CGM.getModule(), OMPRTL___kmpc_barrier),
5111                       BarrierArgs);
5112 
5113   // Broadcast final result
5114   bool IsAggregate = PrivateType->isAggregateType();
5115   LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5116   llvm::Value *FinalResultVal = nullptr;
5117   Address FinalResultAddr = Address::invalid();
5118 
5119   if (IsAggregate)
5120     FinalResultAddr = SharedResult;
5121   else
5122     FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5123 
5124   LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5125   if (IsAggregate) {
5126     CGF.EmitAggregateCopy(TargetLHSLV,
5127                           CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5128                           PrivateType, AggValueSlot::DoesNotOverlap, false);
5129   } else {
5130     CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5131   }
5132   // Final synchronization barrier
5133   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5134                           CGM.getModule(), OMPRTL___kmpc_barrier),
5135                       BarrierArgs);
5136 
5137   // Combiner with original list item
5138   auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5139                                   PrePostActionTy &Action) {
5140     Action.Enter(CGF);
5141     emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5142                                 cast<DeclRefExpr>(LHSExprs),
5143                                 cast<DeclRefExpr>(RHSExprs));
5144   };
5145   EmitCriticalReduction(OriginalListCombiner);
5146 }
5147 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > OrgPrivates,ArrayRef<const Expr * > OrgLHSExprs,ArrayRef<const Expr * > OrgRHSExprs,ArrayRef<const Expr * > OrgReductionOps,ReductionOptionsTy Options)5148 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5149                                     ArrayRef<const Expr *> OrgPrivates,
5150                                     ArrayRef<const Expr *> OrgLHSExprs,
5151                                     ArrayRef<const Expr *> OrgRHSExprs,
5152                                     ArrayRef<const Expr *> OrgReductionOps,
5153                                     ReductionOptionsTy Options) {
5154   if (!CGF.HaveInsertPoint())
5155     return;
5156 
5157   bool WithNowait = Options.WithNowait;
5158   bool SimpleReduction = Options.SimpleReduction;
5159 
5160   // Next code should be emitted for reduction:
5161   //
5162   // static kmp_critical_name lock = { 0 };
5163   //
5164   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5165   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5166   //  ...
5167   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5168   //  *(Type<n>-1*)rhs[<n>-1]);
5169   // }
5170   //
5171   // ...
5172   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5173   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5174   // RedList, reduce_func, &<lock>)) {
5175   // case 1:
5176   //  ...
5177   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5178   //  ...
5179   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5180   // break;
5181   // case 2:
5182   //  ...
5183   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5184   //  ...
5185   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5186   // break;
5187   // default:;
5188   // }
5189   //
5190   // if SimpleReduction is true, only the next code is generated:
5191   //  ...
5192   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193   //  ...
5194 
5195   ASTContext &C = CGM.getContext();
5196 
5197   if (SimpleReduction) {
5198     CodeGenFunction::RunCleanupsScope Scope(CGF);
5199     const auto *IPriv = OrgPrivates.begin();
5200     const auto *ILHS = OrgLHSExprs.begin();
5201     const auto *IRHS = OrgRHSExprs.begin();
5202     for (const Expr *E : OrgReductionOps) {
5203       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5204                                   cast<DeclRefExpr>(*IRHS));
5205       ++IPriv;
5206       ++ILHS;
5207       ++IRHS;
5208     }
5209     return;
5210   }
5211 
5212   // Filter out shared  reduction variables based on IsPrivateVarReduction flag.
5213   // Only keep entries where the corresponding variable is not private.
5214   SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5215       FilteredRHSExprs, FilteredReductionOps;
5216   for (unsigned I : llvm::seq<unsigned>(
5217            std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5218     if (!Options.IsPrivateVarReduction[I]) {
5219       FilteredPrivates.emplace_back(OrgPrivates[I]);
5220       FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5221       FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5222       FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5223     }
5224   }
5225   // Wrap filtered vectors in ArrayRef for downstream shared reduction
5226   // processing.
5227   ArrayRef<const Expr *> Privates = FilteredPrivates;
5228   ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5229   ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5230   ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5231 
5232   // 1. Build a list of reduction variables.
5233   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5234   auto Size = RHSExprs.size();
5235   for (const Expr *E : Privates) {
5236     if (E->getType()->isVariablyModifiedType())
5237       // Reserve place for array size.
5238       ++Size;
5239   }
5240   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5241   QualType ReductionArrayTy = C.getConstantArrayType(
5242       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5243       /*IndexTypeQuals=*/0);
5244   RawAddress ReductionList =
5245       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5246   const auto *IPriv = Privates.begin();
5247   unsigned Idx = 0;
5248   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5249     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5250     CGF.Builder.CreateStore(
5251         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5253         Elem);
5254     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5255       // Store array size.
5256       ++Idx;
5257       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5258       llvm::Value *Size = CGF.Builder.CreateIntCast(
5259           CGF.getVLASize(
5260                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5261               .NumElts,
5262           CGF.SizeTy, /*isSigned=*/false);
5263       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5264                               Elem);
5265     }
5266   }
5267 
5268   // 2. Emit reduce_func().
5269   llvm::Function *ReductionFn = emitReductionFunction(
5270       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5271       Privates, LHSExprs, RHSExprs, ReductionOps);
5272 
5273   // 3. Create static kmp_critical_name lock = { 0 };
5274   std::string Name = getName({"reduction"});
5275   llvm::Value *Lock = getCriticalRegionLock(Name);
5276 
5277   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5278   // RedList, reduce_func, &<lock>);
5279   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5280   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5281   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5282   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283       ReductionList.getPointer(), CGF.VoidPtrTy);
5284   llvm::Value *Args[] = {
5285       IdentTLoc,                             // ident_t *<loc>
5286       ThreadId,                              // i32 <gtid>
5287       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5288       ReductionArrayTySize,                  // size_type sizeof(RedList)
5289       RL,                                    // void *RedList
5290       ReductionFn, // void (*) (void *, void *) <reduce_func>
5291       Lock         // kmp_critical_name *&<lock>
5292   };
5293   llvm::Value *Res = CGF.EmitRuntimeCall(
5294       OMPBuilder.getOrCreateRuntimeFunction(
5295           CGM.getModule(),
5296           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5297       Args);
5298 
5299   // 5. Build switch(res)
5300   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5301   llvm::SwitchInst *SwInst =
5302       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5303 
5304   // 6. Build case 1:
5305   //  ...
5306   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5307   //  ...
5308   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5309   // break;
5310   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5311   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5312   CGF.EmitBlock(Case1BB);
5313 
5314   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315   llvm::Value *EndArgs[] = {
5316       IdentTLoc, // ident_t *<loc>
5317       ThreadId,  // i32 <gtid>
5318       Lock       // kmp_critical_name *&<lock>
5319   };
5320   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5321                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5322     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5323     const auto *IPriv = Privates.begin();
5324     const auto *ILHS = LHSExprs.begin();
5325     const auto *IRHS = RHSExprs.begin();
5326     for (const Expr *E : ReductionOps) {
5327       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5328                                      cast<DeclRefExpr>(*IRHS));
5329       ++IPriv;
5330       ++ILHS;
5331       ++IRHS;
5332     }
5333   };
5334   RegionCodeGenTy RCG(CodeGen);
5335   CommonActionTy Action(
5336       nullptr, {},
5337       OMPBuilder.getOrCreateRuntimeFunction(
5338           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5339                                       : OMPRTL___kmpc_end_reduce),
5340       EndArgs);
5341   RCG.setAction(Action);
5342   RCG(CGF);
5343 
5344   CGF.EmitBranch(DefaultBB);
5345 
5346   // 7. Build case 2:
5347   //  ...
5348   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5349   //  ...
5350   // break;
5351   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5352   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5353   CGF.EmitBlock(Case2BB);
5354 
5355   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5356                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5357     const auto *ILHS = LHSExprs.begin();
5358     const auto *IRHS = RHSExprs.begin();
5359     const auto *IPriv = Privates.begin();
5360     for (const Expr *E : ReductionOps) {
5361       const Expr *XExpr = nullptr;
5362       const Expr *EExpr = nullptr;
5363       const Expr *UpExpr = nullptr;
5364       BinaryOperatorKind BO = BO_Comma;
5365       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5366         if (BO->getOpcode() == BO_Assign) {
5367           XExpr = BO->getLHS();
5368           UpExpr = BO->getRHS();
5369         }
5370       }
5371       // Try to emit update expression as a simple atomic.
5372       const Expr *RHSExpr = UpExpr;
5373       if (RHSExpr) {
5374         // Analyze RHS part of the whole expression.
5375         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5376                 RHSExpr->IgnoreParenImpCasts())) {
5377           // If this is a conditional operator, analyze its condition for
5378           // min/max reduction operator.
5379           RHSExpr = ACO->getCond();
5380         }
5381         if (const auto *BORHS =
5382                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5383           EExpr = BORHS->getRHS();
5384           BO = BORHS->getOpcode();
5385         }
5386       }
5387       if (XExpr) {
5388         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5389         auto &&AtomicRedGen = [BO, VD,
5390                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5391                                     const Expr *EExpr, const Expr *UpExpr) {
5392           LValue X = CGF.EmitLValue(XExpr);
5393           RValue E;
5394           if (EExpr)
5395             E = CGF.EmitAnyExpr(EExpr);
5396           CGF.EmitOMPAtomicSimpleUpdateExpr(
5397               X, E, BO, /*IsXLHSInRHSPart=*/true,
5398               llvm::AtomicOrdering::Monotonic, Loc,
5399               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5400                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5401                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5402                 CGF.emitOMPSimpleStore(
5403                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5404                     VD->getType().getNonReferenceType(), Loc);
5405                 PrivateScope.addPrivate(VD, LHSTemp);
5406                 (void)PrivateScope.Privatize();
5407                 return CGF.EmitAnyExpr(UpExpr);
5408               });
5409         };
5410         if ((*IPriv)->getType()->isArrayType()) {
5411           // Emit atomic reduction for array section.
5412           const auto *RHSVar =
5413               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5414           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5415                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5416         } else {
5417           // Emit atomic reduction for array subscript or single variable.
5418           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5419         }
5420       } else {
5421         // Emit as a critical region.
5422         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5423                                      const Expr *, const Expr *) {
5424           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5425           std::string Name = RT.getName({"atomic_reduction"});
5426           RT.emitCriticalRegion(
5427               CGF, Name,
5428               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5429                 Action.Enter(CGF);
5430                 emitReductionCombiner(CGF, E);
5431               },
5432               Loc);
5433         };
5434         if ((*IPriv)->getType()->isArrayType()) {
5435           const auto *LHSVar =
5436               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5437           const auto *RHSVar =
5438               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5439           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5440                                     CritRedGen);
5441         } else {
5442           CritRedGen(CGF, nullptr, nullptr, nullptr);
5443         }
5444       }
5445       ++ILHS;
5446       ++IRHS;
5447       ++IPriv;
5448     }
5449   };
5450   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5451   if (!WithNowait) {
5452     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5453     llvm::Value *EndArgs[] = {
5454         IdentTLoc, // ident_t *<loc>
5455         ThreadId,  // i32 <gtid>
5456         Lock       // kmp_critical_name *&<lock>
5457     };
5458     CommonActionTy Action(nullptr, {},
5459                           OMPBuilder.getOrCreateRuntimeFunction(
5460                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5461                           EndArgs);
5462     AtomicRCG.setAction(Action);
5463     AtomicRCG(CGF);
5464   } else {
5465     AtomicRCG(CGF);
5466   }
5467 
5468   CGF.EmitBranch(DefaultBB);
5469   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5470   assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5471          "PrivateVarReduction: Privates size mismatch");
5472   assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5473          "PrivateVarReduction: ReductionOps size mismatch");
5474   for (unsigned I : llvm::seq<unsigned>(
5475            std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5476     if (Options.IsPrivateVarReduction[I])
5477       emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5478                            OrgRHSExprs[I], OrgReductionOps[I]);
5479   }
5480 }
5481 
5482 /// Generates unique name for artificial threadprivate variables.
5483 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5484 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5485                                       const Expr *Ref) {
5486   SmallString<256> Buffer;
5487   llvm::raw_svector_ostream Out(Buffer);
5488   const clang::DeclRefExpr *DE;
5489   const VarDecl *D = ::getBaseDecl(Ref, DE);
5490   if (!D)
5491     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5492   D = D->getCanonicalDecl();
5493   std::string Name = CGM.getOpenMPRuntime().getName(
5494       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5495   Out << Prefix << Name << "_"
5496       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5497   return std::string(Out.str());
5498 }
5499 
5500 /// Emits reduction initializer function:
5501 /// \code
5502 /// void @.red_init(void* %arg, void* %orig) {
5503 /// %0 = bitcast void* %arg to <type>*
5504 /// store <type> <init>, <type>* %0
5505 /// ret void
5506 /// }
5507 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5508 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5509                                            SourceLocation Loc,
5510                                            ReductionCodeGen &RCG, unsigned N) {
5511   ASTContext &C = CGM.getContext();
5512   QualType VoidPtrTy = C.VoidPtrTy;
5513   VoidPtrTy.addRestrict();
5514   FunctionArgList Args;
5515   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5516                           ImplicitParamKind::Other);
5517   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5518                               ImplicitParamKind::Other);
5519   Args.emplace_back(&Param);
5520   Args.emplace_back(&ParamOrig);
5521   const auto &FnInfo =
5522       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5523   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5524   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5525   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5526                                     Name, &CGM.getModule());
5527   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5528   Fn->setDoesNotRecurse();
5529   CodeGenFunction CGF(CGM);
5530   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5531   QualType PrivateType = RCG.getPrivateType(N);
5532   Address PrivateAddr = CGF.EmitLoadOfPointer(
5533       CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5534       C.getPointerType(PrivateType)->castAs<PointerType>());
5535   llvm::Value *Size = nullptr;
5536   // If the size of the reduction item is non-constant, load it from global
5537   // threadprivate variable.
5538   if (RCG.getSizes(N).second) {
5539     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540         CGF, CGM.getContext().getSizeType(),
5541         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5542     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5543                                 CGM.getContext().getSizeType(), Loc);
5544   }
5545   RCG.emitAggregateType(CGF, N, Size);
5546   Address OrigAddr = Address::invalid();
5547   // If initializer uses initializer from declare reduction construct, emit a
5548   // pointer to the address of the original reduction item (reuired by reduction
5549   // initializer)
5550   if (RCG.usesReductionInitializer(N)) {
5551     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5552     OrigAddr = CGF.EmitLoadOfPointer(
5553         SharedAddr,
5554         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5555   }
5556   // Emit the initializer:
5557   // %0 = bitcast void* %arg to <type>*
5558   // store <type> <init>, <type>* %0
5559   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5560                          [](CodeGenFunction &) { return false; });
5561   CGF.FinishFunction();
5562   return Fn;
5563 }
5564 
5565 /// Emits reduction combiner function:
5566 /// \code
5567 /// void @.red_comb(void* %arg0, void* %arg1) {
5568 /// %lhs = bitcast void* %arg0 to <type>*
5569 /// %rhs = bitcast void* %arg1 to <type>*
5570 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5571 /// store <type> %2, <type>* %lhs
5572 /// ret void
5573 /// }
5574 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5575 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5576                                            SourceLocation Loc,
5577                                            ReductionCodeGen &RCG, unsigned N,
5578                                            const Expr *ReductionOp,
5579                                            const Expr *LHS, const Expr *RHS,
5580                                            const Expr *PrivateRef) {
5581   ASTContext &C = CGM.getContext();
5582   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5583   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5584   FunctionArgList Args;
5585   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5586                                C.VoidPtrTy, ImplicitParamKind::Other);
5587   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5588                             ImplicitParamKind::Other);
5589   Args.emplace_back(&ParamInOut);
5590   Args.emplace_back(&ParamIn);
5591   const auto &FnInfo =
5592       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5593   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5594   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5595   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5596                                     Name, &CGM.getModule());
5597   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5598   Fn->setDoesNotRecurse();
5599   CodeGenFunction CGF(CGM);
5600   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5601   llvm::Value *Size = nullptr;
5602   // If the size of the reduction item is non-constant, load it from global
5603   // threadprivate variable.
5604   if (RCG.getSizes(N).second) {
5605     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5606         CGF, CGM.getContext().getSizeType(),
5607         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5608     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5609                                 CGM.getContext().getSizeType(), Loc);
5610   }
5611   RCG.emitAggregateType(CGF, N, Size);
5612   // Remap lhs and rhs variables to the addresses of the function arguments.
5613   // %lhs = bitcast void* %arg0 to <type>*
5614   // %rhs = bitcast void* %arg1 to <type>*
5615   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5616   PrivateScope.addPrivate(
5617       LHSVD,
5618       // Pull out the pointer to the variable.
5619       CGF.EmitLoadOfPointer(
5620           CGF.GetAddrOfLocalVar(&ParamInOut)
5621               .withElementType(CGF.Builder.getPtrTy(0)),
5622           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5623   PrivateScope.addPrivate(
5624       RHSVD,
5625       // Pull out the pointer to the variable.
5626       CGF.EmitLoadOfPointer(
5627           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5628               CGF.Builder.getPtrTy(0)),
5629           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5630   PrivateScope.Privatize();
5631   // Emit the combiner body:
5632   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5633   // store <type> %2, <type>* %lhs
5634   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5635       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5636       cast<DeclRefExpr>(RHS));
5637   CGF.FinishFunction();
5638   return Fn;
5639 }
5640 
5641 /// Emits reduction finalizer function:
5642 /// \code
5643 /// void @.red_fini(void* %arg) {
5644 /// %0 = bitcast void* %arg to <type>*
5645 /// <destroy>(<type>* %0)
5646 /// ret void
5647 /// }
5648 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5649 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5650                                            SourceLocation Loc,
5651                                            ReductionCodeGen &RCG, unsigned N) {
5652   if (!RCG.needCleanups(N))
5653     return nullptr;
5654   ASTContext &C = CGM.getContext();
5655   FunctionArgList Args;
5656   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5657                           ImplicitParamKind::Other);
5658   Args.emplace_back(&Param);
5659   const auto &FnInfo =
5660       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5661   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5662   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5663   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5664                                     Name, &CGM.getModule());
5665   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5666   Fn->setDoesNotRecurse();
5667   CodeGenFunction CGF(CGM);
5668   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5669   Address PrivateAddr = CGF.EmitLoadOfPointer(
5670       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5671   llvm::Value *Size = nullptr;
5672   // If the size of the reduction item is non-constant, load it from global
5673   // threadprivate variable.
5674   if (RCG.getSizes(N).second) {
5675     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5676         CGF, CGM.getContext().getSizeType(),
5677         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5678     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5679                                 CGM.getContext().getSizeType(), Loc);
5680   }
5681   RCG.emitAggregateType(CGF, N, Size);
5682   // Emit the finalizer body:
5683   // <destroy>(<type>* %0)
5684   RCG.emitCleanups(CGF, N, PrivateAddr);
5685   CGF.FinishFunction(Loc);
5686   return Fn;
5687 }
5688 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5689 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5690     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5691     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5692   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5693     return nullptr;
5694 
5695   // Build typedef struct:
5696   // kmp_taskred_input {
5697   //   void *reduce_shar; // shared reduction item
5698   //   void *reduce_orig; // original reduction item used for initialization
5699   //   size_t reduce_size; // size of data item
5700   //   void *reduce_init; // data initialization routine
5701   //   void *reduce_fini; // data finalization routine
5702   //   void *reduce_comb; // data combiner routine
5703   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5704   // } kmp_taskred_input_t;
5705   ASTContext &C = CGM.getContext();
5706   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5707   RD->startDefinition();
5708   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5709   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5710   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5711   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5712   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5713   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5714   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5715       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5716   RD->completeDefinition();
5717   QualType RDType = C.getRecordType(RD);
5718   unsigned Size = Data.ReductionVars.size();
5719   llvm::APInt ArraySize(/*numBits=*/64, Size);
5720   QualType ArrayRDType =
5721       C.getConstantArrayType(RDType, ArraySize, nullptr,
5722                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5723   // kmp_task_red_input_t .rd_input.[Size];
5724   RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5725   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5726                        Data.ReductionCopies, Data.ReductionOps);
5727   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5728     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5729     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5730                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5731     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5732         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5733         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5734         ".rd_input.gep.");
5735     LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5736     // ElemLVal.reduce_shar = &Shareds[Cnt];
5737     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5738     RCG.emitSharedOrigLValue(CGF, Cnt);
5739     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5740     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5741     // ElemLVal.reduce_orig = &Origs[Cnt];
5742     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5743     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5744     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5745     RCG.emitAggregateType(CGF, Cnt);
5746     llvm::Value *SizeValInChars;
5747     llvm::Value *SizeVal;
5748     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5749     // We use delayed creation/initialization for VLAs and array sections. It is
5750     // required because runtime does not provide the way to pass the sizes of
5751     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5752     // threadprivate global variables are used to store these values and use
5753     // them in the functions.
5754     bool DelayedCreation = !!SizeVal;
5755     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5756                                                /*isSigned=*/false);
5757     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5758     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5759     // ElemLVal.reduce_init = init;
5760     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5761     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5762     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5763     // ElemLVal.reduce_fini = fini;
5764     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5765     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5766     llvm::Value *FiniAddr =
5767         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5768     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5769     // ElemLVal.reduce_comb = comb;
5770     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5771     llvm::Value *CombAddr = emitReduceCombFunction(
5772         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5773         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5774     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5775     // ElemLVal.flags = 0;
5776     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5777     if (DelayedCreation) {
5778       CGF.EmitStoreOfScalar(
5779           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5780           FlagsLVal);
5781     } else
5782       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5783   }
5784   if (Data.IsReductionWithTaskMod) {
5785     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5786     // is_ws, int num, void *data);
5787     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5788     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5789                                                   CGM.IntTy, /*isSigned=*/true);
5790     llvm::Value *Args[] = {
5791         IdentTLoc, GTid,
5792         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5793                                /*isSigned=*/true),
5794         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5795         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5796             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5797     return CGF.EmitRuntimeCall(
5798         OMPBuilder.getOrCreateRuntimeFunction(
5799             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5800         Args);
5801   }
5802   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5803   llvm::Value *Args[] = {
5804       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5805                                 /*isSigned=*/true),
5806       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5807       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5808                                                       CGM.VoidPtrTy)};
5809   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5810                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5811                              Args);
5812 }
5813 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)5814 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5815                                             SourceLocation Loc,
5816                                             bool IsWorksharingReduction) {
5817   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5818   // is_ws, int num, void *data);
5819   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5820   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5821                                                 CGM.IntTy, /*isSigned=*/true);
5822   llvm::Value *Args[] = {IdentTLoc, GTid,
5823                          llvm::ConstantInt::get(CGM.IntTy,
5824                                                 IsWorksharingReduction ? 1 : 0,
5825                                                 /*isSigned=*/true)};
5826   (void)CGF.EmitRuntimeCall(
5827       OMPBuilder.getOrCreateRuntimeFunction(
5828           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5829       Args);
5830 }
5831 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5832 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5833                                               SourceLocation Loc,
5834                                               ReductionCodeGen &RCG,
5835                                               unsigned N) {
5836   auto Sizes = RCG.getSizes(N);
5837   // Emit threadprivate global variable if the type is non-constant
5838   // (Sizes.second = nullptr).
5839   if (Sizes.second) {
5840     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5841                                                      /*isSigned=*/false);
5842     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5843         CGF, CGM.getContext().getSizeType(),
5844         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5845     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5846   }
5847 }
5848 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)5849 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5850                                               SourceLocation Loc,
5851                                               llvm::Value *ReductionsPtr,
5852                                               LValue SharedLVal) {
5853   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5854   // *d);
5855   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5856                                                    CGM.IntTy,
5857                                                    /*isSigned=*/true),
5858                          ReductionsPtr,
5859                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5860                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5861   return Address(
5862       CGF.EmitRuntimeCall(
5863           OMPBuilder.getOrCreateRuntimeFunction(
5864               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5865           Args),
5866       CGF.Int8Ty, SharedLVal.getAlignment());
5867 }
5868 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)5869 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5870                                        const OMPTaskDataTy &Data) {
5871   if (!CGF.HaveInsertPoint())
5872     return;
5873 
5874   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5875     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5876     OMPBuilder.createTaskwait(CGF.Builder);
5877   } else {
5878     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5879     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5880     auto &M = CGM.getModule();
5881     Address DependenciesArray = Address::invalid();
5882     llvm::Value *NumOfElements;
5883     std::tie(NumOfElements, DependenciesArray) =
5884         emitDependClause(CGF, Data.Dependences, Loc);
5885     if (!Data.Dependences.empty()) {
5886       llvm::Value *DepWaitTaskArgs[7];
5887       DepWaitTaskArgs[0] = UpLoc;
5888       DepWaitTaskArgs[1] = ThreadID;
5889       DepWaitTaskArgs[2] = NumOfElements;
5890       DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5891       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5892       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5893       DepWaitTaskArgs[6] =
5894           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5895 
5896       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5897 
5898       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5899       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5900       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5901       // kmp_int32 has_no_wait); if dependence info is specified.
5902       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5903                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5904                           DepWaitTaskArgs);
5905 
5906     } else {
5907 
5908       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5909       // global_tid);
5910       llvm::Value *Args[] = {UpLoc, ThreadID};
5911       // Ignore return result until untied tasks are supported.
5912       CGF.EmitRuntimeCall(
5913           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5914           Args);
5915     }
5916   }
5917 
5918   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5919     Region->emitUntiedSwitch(CGF);
5920 }
5921 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)5922 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5923                                            OpenMPDirectiveKind InnerKind,
5924                                            const RegionCodeGenTy &CodeGen,
5925                                            bool HasCancel) {
5926   if (!CGF.HaveInsertPoint())
5927     return;
5928   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5929                                  InnerKind != OMPD_critical &&
5930                                      InnerKind != OMPD_master &&
5931                                      InnerKind != OMPD_masked);
5932   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5933 }
5934 
5935 namespace {
5936 enum RTCancelKind {
5937   CancelNoreq = 0,
5938   CancelParallel = 1,
5939   CancelLoop = 2,
5940   CancelSections = 3,
5941   CancelTaskgroup = 4
5942 };
5943 } // anonymous namespace
5944 
getCancellationKind(OpenMPDirectiveKind CancelRegion)5945 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5946   RTCancelKind CancelKind = CancelNoreq;
5947   if (CancelRegion == OMPD_parallel)
5948     CancelKind = CancelParallel;
5949   else if (CancelRegion == OMPD_for)
5950     CancelKind = CancelLoop;
5951   else if (CancelRegion == OMPD_sections)
5952     CancelKind = CancelSections;
5953   else {
5954     assert(CancelRegion == OMPD_taskgroup);
5955     CancelKind = CancelTaskgroup;
5956   }
5957   return CancelKind;
5958 }
5959 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)5960 void CGOpenMPRuntime::emitCancellationPointCall(
5961     CodeGenFunction &CGF, SourceLocation Loc,
5962     OpenMPDirectiveKind CancelRegion) {
5963   if (!CGF.HaveInsertPoint())
5964     return;
5965   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5966   // global_tid, kmp_int32 cncl_kind);
5967   if (auto *OMPRegionInfo =
5968           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5969     // For 'cancellation point taskgroup', the task region info may not have a
5970     // cancel. This may instead happen in another adjacent task.
5971     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5972       llvm::Value *Args[] = {
5973           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5974           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5975       // Ignore return result until untied tasks are supported.
5976       llvm::Value *Result = CGF.EmitRuntimeCall(
5977           OMPBuilder.getOrCreateRuntimeFunction(
5978               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5979           Args);
5980       // if (__kmpc_cancellationpoint()) {
5981       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5982       //   exit from construct;
5983       // }
5984       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5985       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5986       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5987       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5988       CGF.EmitBlock(ExitBB);
5989       if (CancelRegion == OMPD_parallel)
5990         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5991       // exit from construct;
5992       CodeGenFunction::JumpDest CancelDest =
5993           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5994       CGF.EmitBranchThroughCleanup(CancelDest);
5995       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5996     }
5997   }
5998 }
5999 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6000 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6001                                      const Expr *IfCond,
6002                                      OpenMPDirectiveKind CancelRegion) {
6003   if (!CGF.HaveInsertPoint())
6004     return;
6005   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6006   // kmp_int32 cncl_kind);
6007   auto &M = CGM.getModule();
6008   if (auto *OMPRegionInfo =
6009           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6010     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6011                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6012       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6013       llvm::Value *Args[] = {
6014           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6015           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6016       // Ignore return result until untied tasks are supported.
6017       llvm::Value *Result = CGF.EmitRuntimeCall(
6018           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6019       // if (__kmpc_cancel()) {
6020       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021       //   exit from construct;
6022       // }
6023       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6024       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6025       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6026       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6027       CGF.EmitBlock(ExitBB);
6028       if (CancelRegion == OMPD_parallel)
6029         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6030       // exit from construct;
6031       CodeGenFunction::JumpDest CancelDest =
6032           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6033       CGF.EmitBranchThroughCleanup(CancelDest);
6034       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6035     };
6036     if (IfCond) {
6037       emitIfClause(CGF, IfCond, ThenGen,
6038                    [](CodeGenFunction &, PrePostActionTy &) {});
6039     } else {
6040       RegionCodeGenTy ThenRCG(ThenGen);
6041       ThenRCG(CGF);
6042     }
6043   }
6044 }
6045 
6046 namespace {
6047 /// Cleanup action for uses_allocators support.
6048 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6049   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6050 
6051 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6052   OMPUsesAllocatorsActionTy(
6053       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6054       : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6055   void Enter(CodeGenFunction &CGF) override {
6056     if (!CGF.HaveInsertPoint())
6057       return;
6058     for (const auto &AllocatorData : Allocators) {
6059       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6060           CGF, AllocatorData.first, AllocatorData.second);
6061     }
6062   }
Exit(CodeGenFunction & CGF)6063   void Exit(CodeGenFunction &CGF) override {
6064     if (!CGF.HaveInsertPoint())
6065       return;
6066     for (const auto &AllocatorData : Allocators) {
6067       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6068                                                         AllocatorData.first);
6069     }
6070   }
6071 };
6072 } // namespace
6073 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6074 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6075     const OMPExecutableDirective &D, StringRef ParentName,
6076     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6077     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6078   assert(!ParentName.empty() && "Invalid target entry parent name!");
6079   HasEmittedTargetRegion = true;
6080   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6081   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6082     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6083       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6084       if (!D.AllocatorTraits)
6085         continue;
6086       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6087     }
6088   }
6089   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6090   CodeGen.setAction(UsesAllocatorAction);
6091   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6092                                    IsOffloadEntry, CodeGen);
6093 }
6094 
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6095 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6096                                              const Expr *Allocator,
6097                                              const Expr *AllocatorTraits) {
6098   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6099   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6100   // Use default memspace handle.
6101   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6102   llvm::Value *NumTraits = llvm::ConstantInt::get(
6103       CGF.IntTy, cast<ConstantArrayType>(
6104                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6105                      ->getSize()
6106                      .getLimitedValue());
6107   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6108   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109       AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6110   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6111                                            AllocatorTraitsLVal.getBaseInfo(),
6112                                            AllocatorTraitsLVal.getTBAAInfo());
6113   llvm::Value *Traits = Addr.emitRawPointer(CGF);
6114 
6115   llvm::Value *AllocatorVal =
6116       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6117                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6118                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6119   // Store to allocator.
6120   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6121       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6122   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6123   AllocatorVal =
6124       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6125                                Allocator->getType(), Allocator->getExprLoc());
6126   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6127 }
6128 
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6129 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6130                                              const Expr *Allocator) {
6131   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6132   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6133   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6134   llvm::Value *AllocatorVal =
6135       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6136   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6137                                           CGF.getContext().VoidPtrTy,
6138                                           Allocator->getExprLoc());
6139   (void)CGF.EmitRuntimeCall(
6140       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6141                                             OMPRTL___kmpc_destroy_allocator),
6142       {ThreadId, AllocatorVal});
6143 }
6144 
computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective & D,CodeGenFunction & CGF,llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs & Attrs)6145 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6146     const OMPExecutableDirective &D, CodeGenFunction &CGF,
6147     llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6148   assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6149          "invalid default attrs structure");
6150   int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6151   int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6152 
6153   getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6154   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6155                                       /*UpperBoundOnly=*/true);
6156 
6157   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6158     for (auto *A : C->getAttrs()) {
6159       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6160       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6161       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6162         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6163                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
6164       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6165         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6166             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6167             &AttrMaxThreadsVal);
6168       else
6169         continue;
6170 
6171       Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6172       if (AttrMaxThreadsVal > 0)
6173         MaxThreadsVal = MaxThreadsVal > 0
6174                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6175                             : AttrMaxThreadsVal;
6176       Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6177       if (AttrMaxBlocksVal > 0)
6178         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6179                                       : AttrMaxBlocksVal;
6180     }
6181   }
6182 }
6183 
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6184 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6185     const OMPExecutableDirective &D, StringRef ParentName,
6186     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188 
6189   llvm::TargetRegionEntryInfo EntryInfo =
6190       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6191 
6192   CodeGenFunction CGF(CGM, true);
6193   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6194       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6195         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6196 
6197         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6198         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6200       };
6201 
6202   cantFail(OMPBuilder.emitTargetRegionFunction(
6203       EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6204       OutlinedFnID));
6205 
6206   if (!OutlinedFn)
6207     return;
6208 
6209   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6210 
6211   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6212     for (auto *A : C->getAttrs()) {
6213       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6214         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6215     }
6216   }
6217 }
6218 
6219 /// Checks if the expression is constant or does not have non-trivial function
6220 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6221 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6222   // We can skip constant expressions.
6223   // We can skip expressions with trivial calls or simple expressions.
6224   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6225           !E->hasNonTrivialCall(Ctx)) &&
6226          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6227 }
6228 
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6229 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6230                                                     const Stmt *Body) {
6231   const Stmt *Child = Body->IgnoreContainers();
6232   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6233     Child = nullptr;
6234     for (const Stmt *S : C->body()) {
6235       if (const auto *E = dyn_cast<Expr>(S)) {
6236         if (isTrivial(Ctx, E))
6237           continue;
6238       }
6239       // Some of the statements can be ignored.
6240       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6241           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6242         continue;
6243       // Analyze declarations.
6244       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6245         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6246               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6247                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6248                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6249                   isa<UsingDirectiveDecl>(D) ||
6250                   isa<OMPDeclareReductionDecl>(D) ||
6251                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6252                 return true;
6253               const auto *VD = dyn_cast<VarDecl>(D);
6254               if (!VD)
6255                 return false;
6256               return VD->hasGlobalStorage() || !VD->isUsed();
6257             }))
6258           continue;
6259       }
6260       // Found multiple children - cannot get the one child only.
6261       if (Child)
6262         return nullptr;
6263       Child = S;
6264     }
6265     if (Child)
6266       Child = Child->IgnoreContainers();
6267   }
6268   return Child;
6269 }
6270 
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & MinTeamsVal,int32_t & MaxTeamsVal)6271 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6272     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6273     int32_t &MaxTeamsVal) {
6274 
6275   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6276   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6277          "Expected target-based executable directive.");
6278   switch (DirectiveKind) {
6279   case OMPD_target: {
6280     const auto *CS = D.getInnermostCapturedStmt();
6281     const auto *Body =
6282         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6283     const Stmt *ChildStmt =
6284         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6285     if (const auto *NestedDir =
6286             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6287       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6288         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6289           const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6290                                      ->getNumTeams()
6291                                      .front();
6292           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6293             if (auto Constant =
6294                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6295               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6296           return NumTeams;
6297         }
6298         MinTeamsVal = MaxTeamsVal = 0;
6299         return nullptr;
6300       }
6301       MinTeamsVal = MaxTeamsVal = 1;
6302       return nullptr;
6303     }
6304     // A value of -1 is used to check if we need to emit no teams region
6305     MinTeamsVal = MaxTeamsVal = -1;
6306     return nullptr;
6307   }
6308   case OMPD_target_teams_loop:
6309   case OMPD_target_teams:
6310   case OMPD_target_teams_distribute:
6311   case OMPD_target_teams_distribute_simd:
6312   case OMPD_target_teams_distribute_parallel_for:
6313   case OMPD_target_teams_distribute_parallel_for_simd: {
6314     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6315       const Expr *NumTeams =
6316           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6317       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6318         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6319           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6320       return NumTeams;
6321     }
6322     MinTeamsVal = MaxTeamsVal = 0;
6323     return nullptr;
6324   }
6325   case OMPD_target_parallel:
6326   case OMPD_target_parallel_for:
6327   case OMPD_target_parallel_for_simd:
6328   case OMPD_target_parallel_loop:
6329   case OMPD_target_simd:
6330     MinTeamsVal = MaxTeamsVal = 1;
6331     return nullptr;
6332   case OMPD_parallel:
6333   case OMPD_for:
6334   case OMPD_parallel_for:
6335   case OMPD_parallel_loop:
6336   case OMPD_parallel_master:
6337   case OMPD_parallel_sections:
6338   case OMPD_for_simd:
6339   case OMPD_parallel_for_simd:
6340   case OMPD_cancel:
6341   case OMPD_cancellation_point:
6342   case OMPD_ordered:
6343   case OMPD_threadprivate:
6344   case OMPD_allocate:
6345   case OMPD_task:
6346   case OMPD_simd:
6347   case OMPD_tile:
6348   case OMPD_unroll:
6349   case OMPD_sections:
6350   case OMPD_section:
6351   case OMPD_single:
6352   case OMPD_master:
6353   case OMPD_critical:
6354   case OMPD_taskyield:
6355   case OMPD_barrier:
6356   case OMPD_taskwait:
6357   case OMPD_taskgroup:
6358   case OMPD_atomic:
6359   case OMPD_flush:
6360   case OMPD_depobj:
6361   case OMPD_scan:
6362   case OMPD_teams:
6363   case OMPD_target_data:
6364   case OMPD_target_exit_data:
6365   case OMPD_target_enter_data:
6366   case OMPD_distribute:
6367   case OMPD_distribute_simd:
6368   case OMPD_distribute_parallel_for:
6369   case OMPD_distribute_parallel_for_simd:
6370   case OMPD_teams_distribute:
6371   case OMPD_teams_distribute_simd:
6372   case OMPD_teams_distribute_parallel_for:
6373   case OMPD_teams_distribute_parallel_for_simd:
6374   case OMPD_target_update:
6375   case OMPD_declare_simd:
6376   case OMPD_declare_variant:
6377   case OMPD_begin_declare_variant:
6378   case OMPD_end_declare_variant:
6379   case OMPD_declare_target:
6380   case OMPD_end_declare_target:
6381   case OMPD_declare_reduction:
6382   case OMPD_declare_mapper:
6383   case OMPD_taskloop:
6384   case OMPD_taskloop_simd:
6385   case OMPD_master_taskloop:
6386   case OMPD_master_taskloop_simd:
6387   case OMPD_parallel_master_taskloop:
6388   case OMPD_parallel_master_taskloop_simd:
6389   case OMPD_requires:
6390   case OMPD_metadirective:
6391   case OMPD_unknown:
6392     break;
6393   default:
6394     break;
6395   }
6396   llvm_unreachable("Unexpected directive kind.");
6397 }
6398 
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6399 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6400     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6401   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6402          "Clauses associated with the teams directive expected to be emitted "
6403          "only for the host!");
6404   CGBuilderTy &Bld = CGF.Builder;
6405   int32_t MinNT = -1, MaxNT = -1;
6406   const Expr *NumTeams =
6407       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6408   if (NumTeams != nullptr) {
6409     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6410 
6411     switch (DirectiveKind) {
6412     case OMPD_target: {
6413       const auto *CS = D.getInnermostCapturedStmt();
6414       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6415       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6416       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6417                                                   /*IgnoreResultAssign*/ true);
6418       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6419                              /*isSigned=*/true);
6420     }
6421     case OMPD_target_teams:
6422     case OMPD_target_teams_distribute:
6423     case OMPD_target_teams_distribute_simd:
6424     case OMPD_target_teams_distribute_parallel_for:
6425     case OMPD_target_teams_distribute_parallel_for_simd: {
6426       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6427       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6428                                                   /*IgnoreResultAssign*/ true);
6429       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6430                              /*isSigned=*/true);
6431     }
6432     default:
6433       break;
6434     }
6435   }
6436 
6437   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6438   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6439 }
6440 
6441 /// Check for a num threads constant value (stored in \p DefaultVal), or
6442 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6443 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6444 /// nullptr, no expression evaluation is perfomed.
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,const Expr ** E,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal)6445 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6446                           const Expr **E, int32_t &UpperBound,
6447                           bool UpperBoundOnly, llvm::Value **CondVal) {
6448   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6449       CGF.getContext(), CS->getCapturedStmt());
6450   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6451   if (!Dir)
6452     return;
6453 
6454   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6455     // Handle if clause. If if clause present, the number of threads is
6456     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6457     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6458       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6459       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460       const OMPIfClause *IfClause = nullptr;
6461       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6462         if (C->getNameModifier() == OMPD_unknown ||
6463             C->getNameModifier() == OMPD_parallel) {
6464           IfClause = C;
6465           break;
6466         }
6467       }
6468       if (IfClause) {
6469         const Expr *CondExpr = IfClause->getCondition();
6470         bool Result;
6471         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6472           if (!Result) {
6473             UpperBound = 1;
6474             return;
6475           }
6476         } else {
6477           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6478           if (const auto *PreInit =
6479                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6480             for (const auto *I : PreInit->decls()) {
6481               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6482                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6483               } else {
6484                 CodeGenFunction::AutoVarEmission Emission =
6485                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6486                 CGF.EmitAutoVarCleanups(Emission);
6487               }
6488             }
6489             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6490           }
6491         }
6492       }
6493     }
6494     // Check the value of num_threads clause iff if clause was not specified
6495     // or is not evaluated to false.
6496     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6497       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499       const auto *NumThreadsClause =
6500           Dir->getSingleClause<OMPNumThreadsClause>();
6501       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6502       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6503         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6504           UpperBound =
6505               UpperBound
6506                   ? Constant->getZExtValue()
6507                   : std::min(UpperBound,
6508                              static_cast<int32_t>(Constant->getZExtValue()));
6509       // If we haven't found a upper bound, remember we saw a thread limiting
6510       // clause.
6511       if (UpperBound == -1)
6512         UpperBound = 0;
6513       if (!E)
6514         return;
6515       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6516       if (const auto *PreInit =
6517               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6518         for (const auto *I : PreInit->decls()) {
6519           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6520             CGF.EmitVarDecl(cast<VarDecl>(*I));
6521           } else {
6522             CodeGenFunction::AutoVarEmission Emission =
6523                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6524             CGF.EmitAutoVarCleanups(Emission);
6525           }
6526         }
6527       }
6528       *E = NTExpr;
6529     }
6530     return;
6531   }
6532   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6533     UpperBound = 1;
6534 }
6535 
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & UpperBound,bool UpperBoundOnly,llvm::Value ** CondVal,const Expr ** ThreadLimitExpr)6536 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6537     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6538     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6539   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6540          "Clauses associated with the teams directive expected to be emitted "
6541          "only for the host!");
6542   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6543   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6544          "Expected target-based executable directive.");
6545 
6546   const Expr *NT = nullptr;
6547   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6548 
6549   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6550     if (E->isIntegerConstantExpr(CGF.getContext())) {
6551       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6552         UpperBound = UpperBound ? Constant->getZExtValue()
6553                                 : std::min(UpperBound,
6554                                            int32_t(Constant->getZExtValue()));
6555     }
6556     // If we haven't found a upper bound, remember we saw a thread limiting
6557     // clause.
6558     if (UpperBound == -1)
6559       UpperBound = 0;
6560     if (EPtr)
6561       *EPtr = E;
6562   };
6563 
6564   auto ReturnSequential = [&]() {
6565     UpperBound = 1;
6566     return NT;
6567   };
6568 
6569   switch (DirectiveKind) {
6570   case OMPD_target: {
6571     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6572     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6573     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6574         CGF.getContext(), CS->getCapturedStmt());
6575     // TODO: The standard is not clear how to resolve two thread limit clauses,
6576     //       let's pick the teams one if it's present, otherwise the target one.
6577     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6578     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6579       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6580         ThreadLimitClause = TLC;
6581         if (ThreadLimitExpr) {
6582           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6583           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6584           CodeGenFunction::LexicalScope Scope(
6585               CGF,
6586               ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6587           if (const auto *PreInit =
6588                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6589             for (const auto *I : PreInit->decls()) {
6590               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6592               } else {
6593                 CodeGenFunction::AutoVarEmission Emission =
6594                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6595                 CGF.EmitAutoVarCleanups(Emission);
6596               }
6597             }
6598           }
6599         }
6600       }
6601     }
6602     if (ThreadLimitClause)
6603       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6604                         ThreadLimitExpr);
6605     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6606       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6607           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6608         CS = Dir->getInnermostCapturedStmt();
6609         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6610             CGF.getContext(), CS->getCapturedStmt());
6611         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6612       }
6613       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6614         CS = Dir->getInnermostCapturedStmt();
6615         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6617         return ReturnSequential();
6618     }
6619     return NT;
6620   }
6621   case OMPD_target_teams: {
6622     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6623       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6624       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6625       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6626                         ThreadLimitExpr);
6627     }
6628     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6629     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6630     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631         CGF.getContext(), CS->getCapturedStmt());
6632     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6633       if (Dir->getDirectiveKind() == OMPD_distribute) {
6634         CS = Dir->getInnermostCapturedStmt();
6635         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636       }
6637     }
6638     return NT;
6639   }
6640   case OMPD_target_teams_distribute:
6641     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6642       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6643       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6644       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6645                         ThreadLimitExpr);
6646     }
6647     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6648                   UpperBoundOnly, CondVal);
6649     return NT;
6650   case OMPD_target_teams_loop:
6651   case OMPD_target_parallel_loop:
6652   case OMPD_target_parallel:
6653   case OMPD_target_parallel_for:
6654   case OMPD_target_parallel_for_simd:
6655   case OMPD_target_teams_distribute_parallel_for:
6656   case OMPD_target_teams_distribute_parallel_for_simd: {
6657     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6658       const OMPIfClause *IfClause = nullptr;
6659       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6660         if (C->getNameModifier() == OMPD_unknown ||
6661             C->getNameModifier() == OMPD_parallel) {
6662           IfClause = C;
6663           break;
6664         }
6665       }
6666       if (IfClause) {
6667         const Expr *Cond = IfClause->getCondition();
6668         bool Result;
6669         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6670           if (!Result)
6671             return ReturnSequential();
6672         } else {
6673           CodeGenFunction::RunCleanupsScope Scope(CGF);
6674           *CondVal = CGF.EvaluateExprAsBool(Cond);
6675         }
6676       }
6677     }
6678     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6679       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6680       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6681       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6682                         ThreadLimitExpr);
6683     }
6684     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6685       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6686       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6687       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6688       return NumThreadsClause->getNumThreads();
6689     }
6690     return NT;
6691   }
6692   case OMPD_target_teams_distribute_simd:
6693   case OMPD_target_simd:
6694     return ReturnSequential();
6695   default:
6696     break;
6697   }
6698   llvm_unreachable("Unsupported directive kind.");
6699 }
6700 
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6701 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6702     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6703   llvm::Value *NumThreadsVal = nullptr;
6704   llvm::Value *CondVal = nullptr;
6705   llvm::Value *ThreadLimitVal = nullptr;
6706   const Expr *ThreadLimitExpr = nullptr;
6707   int32_t UpperBound = -1;
6708 
6709   const Expr *NT = getNumThreadsExprForTargetDirective(
6710       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6711       &ThreadLimitExpr);
6712 
6713   // Thread limit expressions are used below, emit them.
6714   if (ThreadLimitExpr) {
6715     ThreadLimitVal =
6716         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6717     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6718                                                /*isSigned=*/false);
6719   }
6720 
6721   // Generate the num teams expression.
6722   if (UpperBound == 1) {
6723     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6724   } else if (NT) {
6725     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6726     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6727                                               /*isSigned=*/false);
6728   } else if (ThreadLimitVal) {
6729     // If we do not have a num threads value but a thread limit, replace the
6730     // former with the latter. We know handled the thread limit expression.
6731     NumThreadsVal = ThreadLimitVal;
6732     ThreadLimitVal = nullptr;
6733   } else {
6734     // Default to "0" which means runtime choice.
6735     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6736     NumThreadsVal = CGF.Builder.getInt32(0);
6737   }
6738 
6739   // Handle if clause. If if clause present, the number of threads is
6740   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741   if (CondVal) {
6742     CodeGenFunction::RunCleanupsScope Scope(CGF);
6743     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6744                                              CGF.Builder.getInt32(1));
6745   }
6746 
6747   // If the thread limit and num teams expression were present, take the
6748   // minimum.
6749   if (ThreadLimitVal) {
6750     NumThreadsVal = CGF.Builder.CreateSelect(
6751         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6752         ThreadLimitVal, NumThreadsVal);
6753   }
6754 
6755   return NumThreadsVal;
6756 }
6757 
6758 namespace {
6759 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6760 
6761 // Utility to handle information from clauses associated with a given
6762 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6763 // It provides a convenient interface to obtain the information and generate
6764 // code for that information.
6765 class MappableExprsHandler {
6766 public:
6767   /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()6768   static unsigned getFlagMemberOffset() {
6769     unsigned Offset = 0;
6770     for (uint64_t Remain =
6771              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6772                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6773          !(Remain & 1); Remain = Remain >> 1)
6774       Offset++;
6775     return Offset;
6776   }
6777 
6778   /// Class that holds debugging information for a data mapping to be passed to
6779   /// the runtime library.
6780   class MappingExprInfo {
6781     /// The variable declaration used for the data mapping.
6782     const ValueDecl *MapDecl = nullptr;
6783     /// The original expression used in the map clause, or null if there is
6784     /// none.
6785     const Expr *MapExpr = nullptr;
6786 
6787   public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)6788     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6789         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6790 
getMapDecl() const6791     const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const6792     const Expr *getMapExpr() const { return MapExpr; }
6793   };
6794 
6795   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6796   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6797   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6798   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6799   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6800   using MapNonContiguousArrayTy =
6801       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6802   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6803   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6804   using MapData =
6805       std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
6806                  OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
6807                  bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6808   using MapDataArrayTy = SmallVector<MapData, 4>;
6809 
6810   /// This structure contains combined information generated for mappable
6811   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6812   /// mappers, and non-contiguous information.
6813   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6814     MapExprsArrayTy Exprs;
6815     MapValueDeclsArrayTy Mappers;
6816     MapValueDeclsArrayTy DevicePtrDecls;
6817 
6818     /// Append arrays in \a CurInfo.
append__anon93cce0fb3111::MappableExprsHandler::MapCombinedInfoTy6819     void append(MapCombinedInfoTy &CurInfo) {
6820       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6821       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6822                             CurInfo.DevicePtrDecls.end());
6823       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6824       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6825     }
6826   };
6827 
6828   /// Map between a struct and the its lowest & highest elements which have been
6829   /// mapped.
6830   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6831   ///                    HE(FieldIndex, Pointer)}
6832   struct StructRangeInfoTy {
6833     MapCombinedInfoTy PreliminaryMapData;
6834     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6835         0, Address::invalid()};
6836     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6837         0, Address::invalid()};
6838     Address Base = Address::invalid();
6839     Address LB = Address::invalid();
6840     bool IsArraySection = false;
6841     bool HasCompleteRecord = false;
6842   };
6843 
6844 private:
6845   /// Kind that defines how a device pointer has to be returned.
6846   struct MapInfo {
6847     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6848     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6849     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6850     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6851     bool ReturnDevicePointer = false;
6852     bool IsImplicit = false;
6853     const ValueDecl *Mapper = nullptr;
6854     const Expr *VarRef = nullptr;
6855     bool ForDeviceAddr = false;
6856 
6857     MapInfo() = default;
MapInfo__anon93cce0fb3111::MappableExprsHandler::MapInfo6858     MapInfo(
6859         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6860         OpenMPMapClauseKind MapType,
6861         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6862         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6863         bool ReturnDevicePointer, bool IsImplicit,
6864         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6865         bool ForDeviceAddr = false)
6866         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6867           MotionModifiers(MotionModifiers),
6868           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6869           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6870   };
6871 
6872   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6873   /// member and there is no map information about it, then emission of that
6874   /// entry is deferred until the whole struct has been processed.
6875   struct DeferredDevicePtrEntryTy {
6876     const Expr *IE = nullptr;
6877     const ValueDecl *VD = nullptr;
6878     bool ForDeviceAddr = false;
6879 
DeferredDevicePtrEntryTy__anon93cce0fb3111::MappableExprsHandler::DeferredDevicePtrEntryTy6880     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6881                              bool ForDeviceAddr)
6882         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6883   };
6884 
6885   /// The target directive from where the mappable clauses were extracted. It
6886   /// is either a executable directive or a user-defined mapper directive.
6887   llvm::PointerUnion<const OMPExecutableDirective *,
6888                      const OMPDeclareMapperDecl *>
6889       CurDir;
6890 
6891   /// Function the directive is being generated for.
6892   CodeGenFunction &CGF;
6893 
6894   /// Set of all first private variables in the current directive.
6895   /// bool data is set to true if the variable is implicitly marked as
6896   /// firstprivate, false otherwise.
6897   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6898 
6899   /// Map between device pointer declarations and their expression components.
6900   /// The key value for declarations in 'this' is null.
6901   llvm::DenseMap<
6902       const ValueDecl *,
6903       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6904       DevPointersMap;
6905 
6906   /// Map between device addr declarations and their expression components.
6907   /// The key value for declarations in 'this' is null.
6908   llvm::DenseMap<
6909       const ValueDecl *,
6910       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6911       HasDevAddrsMap;
6912 
6913   /// Map between lambda declarations and their map type.
6914   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6915 
getExprTypeSize(const Expr * E) const6916   llvm::Value *getExprTypeSize(const Expr *E) const {
6917     QualType ExprTy = E->getType().getCanonicalType();
6918 
6919     // Calculate the size for array shaping expression.
6920     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6921       llvm::Value *Size =
6922           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6923       for (const Expr *SE : OAE->getDimensions()) {
6924         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6925         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6926                                       CGF.getContext().getSizeType(),
6927                                       SE->getExprLoc());
6928         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6929       }
6930       return Size;
6931     }
6932 
6933     // Reference types are ignored for mapping purposes.
6934     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6935       ExprTy = RefTy->getPointeeType().getCanonicalType();
6936 
6937     // Given that an array section is considered a built-in type, we need to
6938     // do the calculation based on the length of the section instead of relying
6939     // on CGF.getTypeSize(E->getType()).
6940     if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6941       QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6942                             OAE->getBase()->IgnoreParenImpCasts())
6943                             .getCanonicalType();
6944 
6945       // If there is no length associated with the expression and lower bound is
6946       // not specified too, that means we are using the whole length of the
6947       // base.
6948       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6949           !OAE->getLowerBound())
6950         return CGF.getTypeSize(BaseTy);
6951 
6952       llvm::Value *ElemSize;
6953       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6954         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6955       } else {
6956         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6957         assert(ATy && "Expecting array type if not a pointer type.");
6958         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6959       }
6960 
6961       // If we don't have a length at this point, that is because we have an
6962       // array section with a single element.
6963       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6964         return ElemSize;
6965 
6966       if (const Expr *LenExpr = OAE->getLength()) {
6967         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6968         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6969                                              CGF.getContext().getSizeType(),
6970                                              LenExpr->getExprLoc());
6971         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6972       }
6973       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6974              OAE->getLowerBound() && "expected array_section[lb:].");
6975       // Size = sizetype - lb * elemtype;
6976       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6977       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6978       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6979                                        CGF.getContext().getSizeType(),
6980                                        OAE->getLowerBound()->getExprLoc());
6981       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6982       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6983       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6984       LengthVal = CGF.Builder.CreateSelect(
6985           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6986       return LengthVal;
6987     }
6988     return CGF.getTypeSize(ExprTy);
6989   }
6990 
6991   /// Return the corresponding bits for a given map clause modifier. Add
6992   /// a flag marking the map as a pointer if requested. Add a flag marking the
6993   /// map as the first one of a series of maps that relate to the same map
6994   /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const6995   OpenMPOffloadMappingFlags getMapTypeBits(
6996       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6997       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6998       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6999     OpenMPOffloadMappingFlags Bits =
7000         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7001                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7002     switch (MapType) {
7003     case OMPC_MAP_alloc:
7004     case OMPC_MAP_release:
7005       // alloc and release is the default behavior in the runtime library,  i.e.
7006       // if we don't pass any bits alloc/release that is what the runtime is
7007       // going to do. Therefore, we don't need to signal anything for these two
7008       // type modifiers.
7009       break;
7010     case OMPC_MAP_to:
7011       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7012       break;
7013     case OMPC_MAP_from:
7014       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7015       break;
7016     case OMPC_MAP_tofrom:
7017       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7018               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7019       break;
7020     case OMPC_MAP_delete:
7021       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7022       break;
7023     case OMPC_MAP_unknown:
7024       llvm_unreachable("Unexpected map type!");
7025     }
7026     if (AddPtrFlag)
7027       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7028     if (AddIsTargetParamFlag)
7029       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7030     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7031       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7032     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7033       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7034     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7035         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7036       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7037     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7038       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7039     if (IsNonContiguous)
7040       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7041     return Bits;
7042   }
7043 
7044   /// Return true if the provided expression is a final array section. A
7045   /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7046   bool isFinalArraySectionExpression(const Expr *E) const {
7047     const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7048 
7049     // It is not an array section and therefore not a unity-size one.
7050     if (!OASE)
7051       return false;
7052 
7053     // An array section with no colon always refer to a single element.
7054     if (OASE->getColonLocFirst().isInvalid())
7055       return false;
7056 
7057     const Expr *Length = OASE->getLength();
7058 
7059     // If we don't have a length we have to check if the array has size 1
7060     // for this dimension. Also, we should always expect a length if the
7061     // base type is pointer.
7062     if (!Length) {
7063       QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7064                              OASE->getBase()->IgnoreParenImpCasts())
7065                              .getCanonicalType();
7066       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7067         return ATy->getSExtSize() != 1;
7068       // If we don't have a constant dimension length, we have to consider
7069       // the current section as having any size, so it is not necessarily
7070       // unitary. If it happen to be unity size, that's user fault.
7071       return true;
7072     }
7073 
7074     // Check if the length evaluates to 1.
7075     Expr::EvalResult Result;
7076     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7077       return true; // Can have more that size 1.
7078 
7079     llvm::APSInt ConstLength = Result.Val.getInt();
7080     return ConstLength.getSExtValue() != 1;
7081   }
7082 
7083   /// Generate the base pointers, section pointers, sizes, map type bits, and
7084   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7085   /// map type, map or motion modifiers, and expression components.
7086   /// \a IsFirstComponent should be set to true if the provided set of
7087   /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,MapCombinedInfoTy & StructBaseCombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool GenerateAllInfoForClauses,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements={},bool AreBothBasePtrAndPteeMapped=false) const7088   void generateInfoForComponentList(
7089       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7090       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7091       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7092       MapCombinedInfoTy &CombinedInfo,
7093       MapCombinedInfoTy &StructBaseCombinedInfo,
7094       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7095       bool IsImplicit, bool GenerateAllInfoForClauses,
7096       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7097       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7098       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7099           OverlappedElements = {},
7100       bool AreBothBasePtrAndPteeMapped = false) const {
7101     // The following summarizes what has to be generated for each map and the
7102     // types below. The generated information is expressed in this order:
7103     // base pointer, section pointer, size, flags
7104     // (to add to the ones that come from the map type and modifier).
7105     //
7106     // double d;
7107     // int i[100];
7108     // float *p;
7109     // int **a = &i;
7110     //
7111     // struct S1 {
7112     //   int i;
7113     //   float f[50];
7114     // }
7115     // struct S2 {
7116     //   int i;
7117     //   float f[50];
7118     //   S1 s;
7119     //   double *p;
7120     //   struct S2 *ps;
7121     //   int &ref;
7122     // }
7123     // S2 s;
7124     // S2 *ps;
7125     //
7126     // map(d)
7127     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7128     //
7129     // map(i)
7130     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7131     //
7132     // map(i[1:23])
7133     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7134     //
7135     // map(p)
7136     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7137     //
7138     // map(p[1:24])
7139     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7140     // in unified shared memory mode or for local pointers
7141     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7142     //
7143     // map((*a)[0:3])
7144     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7145     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7146     //
7147     // map(**a)
7148     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7149     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7150     //
7151     // map(s)
7152     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7153     //
7154     // map(s.i)
7155     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7156     //
7157     // map(s.s.f)
7158     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7159     //
7160     // map(s.p)
7161     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7162     //
7163     // map(to: s.p[:22])
7164     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7165     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7166     // &(s.p), &(s.p[0]), 22*sizeof(double),
7167     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7168     // (*) alloc space for struct members, only this is a target parameter
7169     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7170     //      optimizes this entry out, same in the examples below)
7171     // (***) map the pointee (map: to)
7172     //
7173     // map(to: s.ref)
7174     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7175     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7176     // (*) alloc space for struct members, only this is a target parameter
7177     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7178     //      optimizes this entry out, same in the examples below)
7179     // (***) map the pointee (map: to)
7180     //
7181     // map(s.ps)
7182     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7183     //
7184     // map(from: s.ps->s.i)
7185     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7186     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7187     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7188     //
7189     // map(to: s.ps->ps)
7190     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7191     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7192     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7193     //
7194     // map(s.ps->ps->ps)
7195     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7196     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7197     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7198     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7199     //
7200     // map(to: s.ps->ps->s.f[:22])
7201     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7202     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7203     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7204     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7205     //
7206     // map(ps)
7207     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7208     //
7209     // map(ps->i)
7210     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7211     //
7212     // map(ps->s.f)
7213     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7214     //
7215     // map(from: ps->p)
7216     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7217     //
7218     // map(to: ps->p[:22])
7219     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7220     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7221     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7222     //
7223     // map(ps->ps)
7224     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7225     //
7226     // map(from: ps->ps->s.i)
7227     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7228     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7229     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7230     //
7231     // map(from: ps->ps->ps)
7232     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7233     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7234     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7235     //
7236     // map(ps->ps->ps->ps)
7237     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7238     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7239     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7240     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7241     //
7242     // map(to: ps->ps->ps->s.f[:22])
7243     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7244     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7245     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7247     //
7248     // map(to: s.f[:22]) map(from: s.p[:33])
7249     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7250     //     sizeof(double*) (**), TARGET_PARAM
7251     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7252     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7253     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7254     // (*) allocate contiguous space needed to fit all mapped members even if
7255     //     we allocate space for members not mapped (in this example,
7256     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7257     //     them as well because they fall between &s.f[0] and &s.p)
7258     //
7259     // map(from: s.f[:22]) map(to: ps->p[:33])
7260     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7261     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7262     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7263     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7264     // (*) the struct this entry pertains to is the 2nd element in the list of
7265     //     arguments, hence MEMBER_OF(2)
7266     //
7267     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7268     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7269     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7270     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7271     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7272     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7273     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7274     // (*) the struct this entry pertains to is the 4th element in the list
7275     //     of arguments, hence MEMBER_OF(4)
7276     //
7277     // map(p, p[:100])
7278     // ===> map(p[:100])
7279     // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7280 
7281     // Track if the map information being generated is the first for a capture.
7282     bool IsCaptureFirstInfo = IsFirstComponentList;
7283     // When the variable is on a declare target link or in a to clause with
7284     // unified memory, a reference is needed to hold the host/device address
7285     // of the variable.
7286     bool RequiresReference = false;
7287 
7288     // Scan the components from the base to the complete expression.
7289     auto CI = Components.rbegin();
7290     auto CE = Components.rend();
7291     auto I = CI;
7292 
7293     // Track if the map information being generated is the first for a list of
7294     // components.
7295     bool IsExpressionFirstInfo = true;
7296     bool FirstPointerInComplexData = false;
7297     Address BP = Address::invalid();
7298     const Expr *AssocExpr = I->getAssociatedExpression();
7299     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7300     const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7301     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7302 
7303     if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7304       return;
7305     if (isa<MemberExpr>(AssocExpr)) {
7306       // The base is the 'this' pointer. The content of the pointer is going
7307       // to be the base of the field being mapped.
7308       BP = CGF.LoadCXXThisAddress();
7309     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7310                (OASE &&
7311                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7312       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7313     } else if (OAShE &&
7314                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7315       BP = Address(
7316           CGF.EmitScalarExpr(OAShE->getBase()),
7317           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7318           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7319     } else {
7320       // The base is the reference to the variable.
7321       // BP = &Var.
7322       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7323       if (const auto *VD =
7324               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7325         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7326                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7327           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7328               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7329                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7330                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7331             RequiresReference = true;
7332             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7333           }
7334         }
7335       }
7336 
7337       // If the variable is a pointer and is being dereferenced (i.e. is not
7338       // the last component), the base has to be the pointer itself, not its
7339       // reference. References are ignored for mapping purposes.
7340       QualType Ty =
7341           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7342       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7343         // No need to generate individual map information for the pointer, it
7344         // can be associated with the combined storage if shared memory mode is
7345         // active or the base declaration is not global variable.
7346         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7347         if (!AreBothBasePtrAndPteeMapped &&
7348             (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7349              !VD || VD->hasLocalStorage()))
7350           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7351         else
7352           FirstPointerInComplexData = true;
7353         ++I;
7354       }
7355     }
7356 
7357     // Track whether a component of the list should be marked as MEMBER_OF some
7358     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7359     // in a component list should be marked as MEMBER_OF, all subsequent entries
7360     // do not belong to the base struct. E.g.
7361     // struct S2 s;
7362     // s.ps->ps->ps->f[:]
7363     //   (1) (2) (3) (4)
7364     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7365     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7366     // is the pointee of ps(2) which is not member of struct s, so it should not
7367     // be marked as such (it is still PTR_AND_OBJ).
7368     // The variable is initialized to false so that PTR_AND_OBJ entries which
7369     // are not struct members are not considered (e.g. array of pointers to
7370     // data).
7371     bool ShouldBeMemberOf = false;
7372 
7373     // Variable keeping track of whether or not we have encountered a component
7374     // in the component list which is a member expression. Useful when we have a
7375     // pointer or a final array section, in which case it is the previous
7376     // component in the list which tells us whether we have a member expression.
7377     // E.g. X.f[:]
7378     // While processing the final array section "[:]" it is "f" which tells us
7379     // whether we are dealing with a member of a declared struct.
7380     const MemberExpr *EncounteredME = nullptr;
7381 
7382     // Track for the total number of dimension. Start from one for the dummy
7383     // dimension.
7384     uint64_t DimSize = 1;
7385 
7386     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7387     bool IsPrevMemberReference = false;
7388 
7389     bool IsPartialMapped =
7390         !PartialStruct.PreliminaryMapData.BasePointers.empty();
7391 
7392     // We need to check if we will be encountering any MEs. If we do not
7393     // encounter any ME expression it means we will be mapping the whole struct.
7394     // In that case we need to skip adding an entry for the struct to the
7395     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7396     // list only when generating all info for clauses.
7397     bool IsMappingWholeStruct = true;
7398     if (!GenerateAllInfoForClauses) {
7399       IsMappingWholeStruct = false;
7400     } else {
7401       for (auto TempI = I; TempI != CE; ++TempI) {
7402         const MemberExpr *PossibleME =
7403             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7404         if (PossibleME) {
7405           IsMappingWholeStruct = false;
7406           break;
7407         }
7408       }
7409     }
7410 
7411     for (; I != CE; ++I) {
7412       // If the current component is member of a struct (parent struct) mark it.
7413       if (!EncounteredME) {
7414         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7415         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7416         // as MEMBER_OF the parent struct.
7417         if (EncounteredME) {
7418           ShouldBeMemberOf = true;
7419           // Do not emit as complex pointer if this is actually not array-like
7420           // expression.
7421           if (FirstPointerInComplexData) {
7422             QualType Ty = std::prev(I)
7423                               ->getAssociatedDeclaration()
7424                               ->getType()
7425                               .getNonReferenceType();
7426             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7427             FirstPointerInComplexData = false;
7428           }
7429         }
7430       }
7431 
7432       auto Next = std::next(I);
7433 
7434       // We need to generate the addresses and sizes if this is the last
7435       // component, if the component is a pointer or if it is an array section
7436       // whose length can't be proved to be one. If this is a pointer, it
7437       // becomes the base address for the following components.
7438 
7439       // A final array section, is one whose length can't be proved to be one.
7440       // If the map item is non-contiguous then we don't treat any array section
7441       // as final array section.
7442       bool IsFinalArraySection =
7443           !IsNonContiguous &&
7444           isFinalArraySectionExpression(I->getAssociatedExpression());
7445 
7446       // If we have a declaration for the mapping use that, otherwise use
7447       // the base declaration of the map clause.
7448       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7449                                      ? I->getAssociatedDeclaration()
7450                                      : BaseDecl;
7451       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7452                                                : MapExpr;
7453 
7454       // Get information on whether the element is a pointer. Have to do a
7455       // special treatment for array sections given that they are built-in
7456       // types.
7457       const auto *OASE =
7458           dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7459       const auto *OAShE =
7460           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7461       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7462       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7463       bool IsPointer =
7464           OAShE ||
7465           (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7466                        .getCanonicalType()
7467                        ->isAnyPointerType()) ||
7468           I->getAssociatedExpression()->getType()->isAnyPointerType();
7469       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7470                                MapDecl &&
7471                                MapDecl->getType()->isLValueReferenceType();
7472       bool IsNonDerefPointer = IsPointer &&
7473                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7474                                !IsNonContiguous;
7475 
7476       if (OASE)
7477         ++DimSize;
7478 
7479       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7480           IsFinalArraySection) {
7481         // If this is not the last component, we expect the pointer to be
7482         // associated with an array expression or member expression.
7483         assert((Next == CE ||
7484                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7485                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7486                 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7487                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7488                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7489                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7490                "Unexpected expression");
7491 
7492         Address LB = Address::invalid();
7493         Address LowestElem = Address::invalid();
7494         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
__anon93cce0fb3202(CodeGenFunction &CGF, const MemberExpr *E) 7495                                        const MemberExpr *E) {
7496           const Expr *BaseExpr = E->getBase();
7497           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7498           // scalar.
7499           LValue BaseLV;
7500           if (E->isArrow()) {
7501             LValueBaseInfo BaseInfo;
7502             TBAAAccessInfo TBAAInfo;
7503             Address Addr =
7504                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7505             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7506             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7507           } else {
7508             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7509           }
7510           return BaseLV;
7511         };
7512         if (OAShE) {
7513           LowestElem = LB =
7514               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7515                       CGF.ConvertTypeForMem(
7516                           OAShE->getBase()->getType()->getPointeeType()),
7517                       CGF.getContext().getTypeAlignInChars(
7518                           OAShE->getBase()->getType()));
7519         } else if (IsMemberReference) {
7520           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7521           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7522           LowestElem = CGF.EmitLValueForFieldInitialization(
7523                               BaseLVal, cast<FieldDecl>(MapDecl))
7524                            .getAddress();
7525           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7526                    .getAddress();
7527         } else {
7528           LowestElem = LB =
7529               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7530                   .getAddress();
7531         }
7532 
7533         // If this component is a pointer inside the base struct then we don't
7534         // need to create any entry for it - it will be combined with the object
7535         // it is pointing to into a single PTR_AND_OBJ entry.
7536         bool IsMemberPointerOrAddr =
7537             EncounteredME &&
7538             (((IsPointer || ForDeviceAddr) &&
7539               I->getAssociatedExpression() == EncounteredME) ||
7540              (IsPrevMemberReference && !IsPointer) ||
7541              (IsMemberReference && Next != CE &&
7542               !Next->getAssociatedExpression()->getType()->isPointerType()));
7543         if (!OverlappedElements.empty() && Next == CE) {
7544           // Handle base element with the info for overlapped elements.
7545           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7546           assert(!IsPointer &&
7547                  "Unexpected base element with the pointer type.");
7548           // Mark the whole struct as the struct that requires allocation on the
7549           // device.
7550           PartialStruct.LowestElem = {0, LowestElem};
7551           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7552               I->getAssociatedExpression()->getType());
7553           Address HB = CGF.Builder.CreateConstGEP(
7554               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7555                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7556               TypeSize.getQuantity() - 1);
7557           PartialStruct.HighestElem = {
7558               std::numeric_limits<decltype(
7559                   PartialStruct.HighestElem.first)>::max(),
7560               HB};
7561           PartialStruct.Base = BP;
7562           PartialStruct.LB = LB;
7563           assert(
7564               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7565               "Overlapped elements must be used only once for the variable.");
7566           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7567           // Emit data for non-overlapped data.
7568           OpenMPOffloadMappingFlags Flags =
7569               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7570               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7571                              /*AddPtrFlag=*/false,
7572                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7573           llvm::Value *Size = nullptr;
7574           // Do bitcopy of all non-overlapped structure elements.
7575           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7576                    Component : OverlappedElements) {
7577             Address ComponentLB = Address::invalid();
7578             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7579                  Component) {
7580               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7581                 const auto *FD = dyn_cast<FieldDecl>(VD);
7582                 if (FD && FD->getType()->isLValueReferenceType()) {
7583                   const auto *ME =
7584                       cast<MemberExpr>(MC.getAssociatedExpression());
7585                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7586                   ComponentLB =
7587                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7588                           .getAddress();
7589                 } else {
7590                   ComponentLB =
7591                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7592                           .getAddress();
7593                 }
7594                 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7595                 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7596                 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7597                                                  LBPtr);
7598                 break;
7599               }
7600             }
7601             assert(Size && "Failed to determine structure size");
7602             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7603             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7604             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7605             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7606             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7607             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7608                 Size, CGF.Int64Ty, /*isSigned=*/true));
7609             CombinedInfo.Types.push_back(Flags);
7610             CombinedInfo.Mappers.push_back(nullptr);
7611             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7612                                                                       : 1);
7613             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7614           }
7615           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7616           CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7617           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7618           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7619           CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7620           llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7621           Size = CGF.Builder.CreatePtrDiff(
7622               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7623               LBPtr);
7624           CombinedInfo.Sizes.push_back(
7625               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7626           CombinedInfo.Types.push_back(Flags);
7627           CombinedInfo.Mappers.push_back(nullptr);
7628           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629                                                                     : 1);
7630           break;
7631         }
7632         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7633         // Skip adding an entry in the CurInfo of this combined entry if the
7634         // whole struct is currently being mapped. The struct needs to be added
7635         // in the first position before any data internal to the struct is being
7636         // mapped.
7637         // Skip adding an entry in the CurInfo of this combined entry if the
7638         // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7639         if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7640             (Next == CE && MapType != OMPC_MAP_unknown)) {
7641           if (!IsMappingWholeStruct) {
7642             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7643             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7644             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7645             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7646             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7647             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7648                 Size, CGF.Int64Ty, /*isSigned=*/true));
7649             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7650                                                                       : 1);
7651           } else {
7652             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7653             StructBaseCombinedInfo.BasePointers.push_back(
7654                 BP.emitRawPointer(CGF));
7655             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7656             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7657             StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7658             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7659                 Size, CGF.Int64Ty, /*isSigned=*/true));
7660             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7661                 IsNonContiguous ? DimSize : 1);
7662           }
7663 
7664           // If Mapper is valid, the last component inherits the mapper.
7665           bool HasMapper = Mapper && Next == CE;
7666           if (!IsMappingWholeStruct)
7667             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7668           else
7669             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7670                                                                : nullptr);
7671 
7672           // We need to add a pointer flag for each map that comes from the
7673           // same expression except for the first one. We also need to signal
7674           // this map is the first one that relates with the current capture
7675           // (there is a set of entries for each capture).
7676           OpenMPOffloadMappingFlags Flags =
7677               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7678                              !IsExpressionFirstInfo || RequiresReference ||
7679                                  FirstPointerInComplexData || IsMemberReference,
7680                              AreBothBasePtrAndPteeMapped ||
7681                                  (IsCaptureFirstInfo && !RequiresReference),
7682                              IsNonContiguous);
7683 
7684           if (!IsExpressionFirstInfo || IsMemberReference) {
7685             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7686             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7687             if (IsPointer || (IsMemberReference && Next != CE))
7688               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7689                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7690                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7691                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7692                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7693 
7694             if (ShouldBeMemberOf) {
7695               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696               // should be later updated with the correct value of MEMBER_OF.
7697               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7698               // From now on, all subsequent PTR_AND_OBJ entries should not be
7699               // marked as MEMBER_OF.
7700               ShouldBeMemberOf = false;
7701             }
7702           }
7703 
7704           if (!IsMappingWholeStruct)
7705             CombinedInfo.Types.push_back(Flags);
7706           else
7707             StructBaseCombinedInfo.Types.push_back(Flags);
7708         }
7709 
7710         // If we have encountered a member expression so far, keep track of the
7711         // mapped member. If the parent is "*this", then the value declaration
7712         // is nullptr.
7713         if (EncounteredME) {
7714           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7715           unsigned FieldIndex = FD->getFieldIndex();
7716 
7717           // Update info about the lowest and highest elements for this struct
7718           if (!PartialStruct.Base.isValid()) {
7719             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7720             if (IsFinalArraySection && OASE) {
7721               Address HB =
7722                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7723                       .getAddress();
7724               PartialStruct.HighestElem = {FieldIndex, HB};
7725             } else {
7726               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7727             }
7728             PartialStruct.Base = BP;
7729             PartialStruct.LB = BP;
7730           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7731             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7732           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7733             if (IsFinalArraySection && OASE) {
7734               Address HB =
7735                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7736                       .getAddress();
7737               PartialStruct.HighestElem = {FieldIndex, HB};
7738             } else {
7739               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7740             }
7741           }
7742         }
7743 
7744         // Need to emit combined struct for array sections.
7745         if (IsFinalArraySection || IsNonContiguous)
7746           PartialStruct.IsArraySection = true;
7747 
7748         // If we have a final array section, we are done with this expression.
7749         if (IsFinalArraySection)
7750           break;
7751 
7752         // The pointer becomes the base for the next element.
7753         if (Next != CE)
7754           BP = IsMemberReference ? LowestElem : LB;
7755         if (!IsPartialMapped)
7756           IsExpressionFirstInfo = false;
7757         IsCaptureFirstInfo = false;
7758         FirstPointerInComplexData = false;
7759         IsPrevMemberReference = IsMemberReference;
7760       } else if (FirstPointerInComplexData) {
7761         QualType Ty = Components.rbegin()
7762                           ->getAssociatedDeclaration()
7763                           ->getType()
7764                           .getNonReferenceType();
7765         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7766         FirstPointerInComplexData = false;
7767       }
7768     }
7769     // If ran into the whole component - allocate the space for the whole
7770     // record.
7771     if (!EncounteredME)
7772       PartialStruct.HasCompleteRecord = true;
7773 
7774     if (!IsNonContiguous)
7775       return;
7776 
7777     const ASTContext &Context = CGF.getContext();
7778 
7779     // For supporting stride in array section, we need to initialize the first
7780     // dimension size as 1, first offset as 0, and first count as 1
7781     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7782     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7783     MapValuesArrayTy CurStrides;
7784     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7785     uint64_t ElementTypeSize;
7786 
7787     // Collect Size information for each dimension and get the element size as
7788     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7789     // should be [10, 10] and the first stride is 4 btyes.
7790     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7791          Components) {
7792       const Expr *AssocExpr = Component.getAssociatedExpression();
7793       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7794 
7795       if (!OASE)
7796         continue;
7797 
7798       QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7799       auto *CAT = Context.getAsConstantArrayType(Ty);
7800       auto *VAT = Context.getAsVariableArrayType(Ty);
7801 
7802       // We need all the dimension size except for the last dimension.
7803       assert((VAT || CAT || &Component == &*Components.begin()) &&
7804              "Should be either ConstantArray or VariableArray if not the "
7805              "first Component");
7806 
7807       // Get element size if CurStrides is empty.
7808       if (CurStrides.empty()) {
7809         const Type *ElementType = nullptr;
7810         if (CAT)
7811           ElementType = CAT->getElementType().getTypePtr();
7812         else if (VAT)
7813           ElementType = VAT->getElementType().getTypePtr();
7814         else
7815           assert(&Component == &*Components.begin() &&
7816                  "Only expect pointer (non CAT or VAT) when this is the "
7817                  "first Component");
7818         // If ElementType is null, then it means the base is a pointer
7819         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7820         // for next iteration.
7821         if (ElementType) {
7822           // For the case that having pointer as base, we need to remove one
7823           // level of indirection.
7824           if (&Component != &*Components.begin())
7825             ElementType = ElementType->getPointeeOrArrayElementType();
7826           ElementTypeSize =
7827               Context.getTypeSizeInChars(ElementType).getQuantity();
7828           CurStrides.push_back(
7829               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7830         }
7831       }
7832       // Get dimension value except for the last dimension since we don't need
7833       // it.
7834       if (DimSizes.size() < Components.size() - 1) {
7835         if (CAT)
7836           DimSizes.push_back(
7837               llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7838         else if (VAT)
7839           DimSizes.push_back(CGF.Builder.CreateIntCast(
7840               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7841               /*IsSigned=*/false));
7842       }
7843     }
7844 
7845     // Skip the dummy dimension since we have already have its information.
7846     auto *DI = DimSizes.begin() + 1;
7847     // Product of dimension.
7848     llvm::Value *DimProd =
7849         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7850 
7851     // Collect info for non-contiguous. Notice that offset, count, and stride
7852     // are only meaningful for array-section, so we insert a null for anything
7853     // other than array-section.
7854     // Also, the size of offset, count, and stride are not the same as
7855     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7856     // count, and stride are the same as the number of non-contiguous
7857     // declaration in target update to/from clause.
7858     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7859          Components) {
7860       const Expr *AssocExpr = Component.getAssociatedExpression();
7861 
7862       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7863         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7864             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7865             /*isSigned=*/false);
7866         CurOffsets.push_back(Offset);
7867         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7868         CurStrides.push_back(CurStrides.back());
7869         continue;
7870       }
7871 
7872       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7873 
7874       if (!OASE)
7875         continue;
7876 
7877       // Offset
7878       const Expr *OffsetExpr = OASE->getLowerBound();
7879       llvm::Value *Offset = nullptr;
7880       if (!OffsetExpr) {
7881         // If offset is absent, then we just set it to zero.
7882         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7883       } else {
7884         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7885                                            CGF.Int64Ty,
7886                                            /*isSigned=*/false);
7887       }
7888       CurOffsets.push_back(Offset);
7889 
7890       // Count
7891       const Expr *CountExpr = OASE->getLength();
7892       llvm::Value *Count = nullptr;
7893       if (!CountExpr) {
7894         // In Clang, once a high dimension is an array section, we construct all
7895         // the lower dimension as array section, however, for case like
7896         // arr[0:2][2], Clang construct the inner dimension as an array section
7897         // but it actually is not in an array section form according to spec.
7898         if (!OASE->getColonLocFirst().isValid() &&
7899             !OASE->getColonLocSecond().isValid()) {
7900           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7901         } else {
7902           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7903           // When the length is absent it defaults to ⌈(size −
7904           // lower-bound)/stride⌉, where size is the size of the array
7905           // dimension.
7906           const Expr *StrideExpr = OASE->getStride();
7907           llvm::Value *Stride =
7908               StrideExpr
7909                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7910                                               CGF.Int64Ty, /*isSigned=*/false)
7911                   : nullptr;
7912           if (Stride)
7913             Count = CGF.Builder.CreateUDiv(
7914                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7915           else
7916             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7917         }
7918       } else {
7919         Count = CGF.EmitScalarExpr(CountExpr);
7920       }
7921       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7922       CurCounts.push_back(Count);
7923 
7924       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7925       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7926       //              Offset      Count     Stride
7927       //    D0          0           1         4    (int)    <- dummy dimension
7928       //    D1          0           2         8    (2 * (1) * 4)
7929       //    D2          1           2         20   (1 * (1 * 5) * 4)
7930       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7931       const Expr *StrideExpr = OASE->getStride();
7932       llvm::Value *Stride =
7933           StrideExpr
7934               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7935                                           CGF.Int64Ty, /*isSigned=*/false)
7936               : nullptr;
7937       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7938       if (Stride)
7939         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7940       else
7941         CurStrides.push_back(DimProd);
7942       if (DI != DimSizes.end())
7943         ++DI;
7944     }
7945 
7946     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7947     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7948     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7949   }
7950 
7951   /// Return the adjusted map modifiers if the declaration a capture refers to
7952   /// appears in a first-private clause. This is expected to be used only with
7953   /// directives that start with 'target'.
7954   OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7955   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7956     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7957 
7958     // A first private variable captured by reference will use only the
7959     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7960     // declaration is known as first-private in this handler.
7961     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7962       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7963         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7964                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7965       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7966              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7967     }
7968     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7969     if (I != LambdasMap.end())
7970       // for map(to: lambda): using user specified map type.
7971       return getMapTypeBits(
7972           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7973           /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7974           /*AddPtrFlag=*/false,
7975           /*AddIsTargetParamFlag=*/false,
7976           /*isNonContiguous=*/false);
7977     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7978            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7979   }
7980 
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7981   void getPlainLayout(const CXXRecordDecl *RD,
7982                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7983                       bool AsBase) const {
7984     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7985 
7986     llvm::StructType *St =
7987         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7988 
7989     unsigned NumElements = St->getNumElements();
7990     llvm::SmallVector<
7991         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7992         RecordLayout(NumElements);
7993 
7994     // Fill bases.
7995     for (const auto &I : RD->bases()) {
7996       if (I.isVirtual())
7997         continue;
7998 
7999       QualType BaseTy = I.getType();
8000       const auto *Base = BaseTy->getAsCXXRecordDecl();
8001       // Ignore empty bases.
8002       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8003           CGF.getContext()
8004               .getASTRecordLayout(Base)
8005               .getNonVirtualSize()
8006               .isZero())
8007         continue;
8008 
8009       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8010       RecordLayout[FieldIndex] = Base;
8011     }
8012     // Fill in virtual bases.
8013     for (const auto &I : RD->vbases()) {
8014       QualType BaseTy = I.getType();
8015       // Ignore empty bases.
8016       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8017         continue;
8018 
8019       const auto *Base = BaseTy->getAsCXXRecordDecl();
8020       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8021       if (RecordLayout[FieldIndex])
8022         continue;
8023       RecordLayout[FieldIndex] = Base;
8024     }
8025     // Fill in all the fields.
8026     assert(!RD->isUnion() && "Unexpected union.");
8027     for (const auto *Field : RD->fields()) {
8028       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8029       // will fill in later.)
8030       if (!Field->isBitField() &&
8031           !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8032         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8033         RecordLayout[FieldIndex] = Field;
8034       }
8035     }
8036     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8037              &Data : RecordLayout) {
8038       if (Data.isNull())
8039         continue;
8040       if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8041         getPlainLayout(Base, Layout, /*AsBase=*/true);
8042       else
8043         Layout.push_back(cast<const FieldDecl *>(Data));
8044     }
8045   }
8046 
8047   /// Generate all the base pointers, section pointers, sizes, map types, and
8048   /// mappers for the extracted mappable expressions (all included in \a
8049   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8050   /// pair of the relevant declaration and index where it occurs is appended to
8051   /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8052   void generateAllInfoForClauses(
8053       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8054       llvm::OpenMPIRBuilder &OMPBuilder,
8055       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8056           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8057     // We have to process the component lists that relate with the same
8058     // declaration in a single chunk so that we can generate the map flags
8059     // correctly. Therefore, we organize all lists in a map.
8060     enum MapKind { Present, Allocs, Other, Total };
8061     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8062                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8063         Info;
8064 
8065     // Helper function to fill the information map for the different supported
8066     // clauses.
8067     auto &&InfoGen =
8068         [&Info, &SkipVarSet](
8069             const ValueDecl *D, MapKind Kind,
8070             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8071             OpenMPMapClauseKind MapType,
8072             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8073             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8074             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8075             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8076           if (SkipVarSet.contains(D))
8077             return;
8078           auto It = Info.try_emplace(D, Total).first;
8079           It->second[Kind].emplace_back(
8080               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8081               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8082         };
8083 
8084     for (const auto *Cl : Clauses) {
8085       const auto *C = dyn_cast<OMPMapClause>(Cl);
8086       if (!C)
8087         continue;
8088       MapKind Kind = Other;
8089       if (llvm::is_contained(C->getMapTypeModifiers(),
8090                              OMPC_MAP_MODIFIER_present))
8091         Kind = Present;
8092       else if (C->getMapType() == OMPC_MAP_alloc)
8093         Kind = Allocs;
8094       const auto *EI = C->getVarRefs().begin();
8095       for (const auto L : C->component_lists()) {
8096         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8097         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8098                 C->getMapTypeModifiers(), {},
8099                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8100                 E);
8101         ++EI;
8102       }
8103     }
8104     for (const auto *Cl : Clauses) {
8105       const auto *C = dyn_cast<OMPToClause>(Cl);
8106       if (!C)
8107         continue;
8108       MapKind Kind = Other;
8109       if (llvm::is_contained(C->getMotionModifiers(),
8110                              OMPC_MOTION_MODIFIER_present))
8111         Kind = Present;
8112       const auto *EI = C->getVarRefs().begin();
8113       for (const auto L : C->component_lists()) {
8114         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8115                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8116                 C->isImplicit(), std::get<2>(L), *EI);
8117         ++EI;
8118       }
8119     }
8120     for (const auto *Cl : Clauses) {
8121       const auto *C = dyn_cast<OMPFromClause>(Cl);
8122       if (!C)
8123         continue;
8124       MapKind Kind = Other;
8125       if (llvm::is_contained(C->getMotionModifiers(),
8126                              OMPC_MOTION_MODIFIER_present))
8127         Kind = Present;
8128       const auto *EI = C->getVarRefs().begin();
8129       for (const auto L : C->component_lists()) {
8130         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8131                 C->getMotionModifiers(),
8132                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8133                 *EI);
8134         ++EI;
8135       }
8136     }
8137 
8138     // Look at the use_device_ptr and use_device_addr clauses information and
8139     // mark the existing map entries as such. If there is no map information for
8140     // an entry in the use_device_ptr and use_device_addr list, we create one
8141     // with map type 'alloc' and zero size section. It is the user fault if that
8142     // was not mapped before. If there is no map information and the pointer is
8143     // a struct member, then we defer the emission of that entry until the whole
8144     // struct has been processed.
8145     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8146                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8147         DeferredInfo;
8148     MapCombinedInfoTy UseDeviceDataCombinedInfo;
8149 
8150     auto &&UseDeviceDataCombinedInfoGen =
8151         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8152                                      CodeGenFunction &CGF, bool IsDevAddr) {
8153           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8154           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8155           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8156           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8157               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8158           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8159           UseDeviceDataCombinedInfo.Sizes.push_back(
8160               llvm::Constant::getNullValue(CGF.Int64Ty));
8161           UseDeviceDataCombinedInfo.Types.push_back(
8162               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8163           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8164         };
8165 
8166     auto &&MapInfoGen =
8167         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8168          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8169                    OMPClauseMappableExprCommon::MappableExprComponentListRef
8170                        Components,
8171                    bool IsImplicit, bool IsDevAddr) {
8172           // We didn't find any match in our map information - generate a zero
8173           // size array section - if the pointer is a struct member we defer
8174           // this action until the whole struct has been processed.
8175           if (isa<MemberExpr>(IE)) {
8176             // Insert the pointer into Info to be processed by
8177             // generateInfoForComponentList. Because it is a member pointer
8178             // without a pointee, no entry will be generated for it, therefore
8179             // we need to generate one after the whole struct has been
8180             // processed. Nonetheless, generateInfoForComponentList must be
8181             // called to take the pointer into account for the calculation of
8182             // the range of the partial struct.
8183             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8184                     /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8185                     IsDevAddr);
8186             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8187           } else {
8188             llvm::Value *Ptr;
8189             if (IsDevAddr) {
8190               if (IE->isGLValue())
8191                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8192               else
8193                 Ptr = CGF.EmitScalarExpr(IE);
8194             } else {
8195               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8196             }
8197             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8198           }
8199         };
8200 
8201     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8202                                     const Expr *IE, bool IsDevAddr) -> bool {
8203       // We potentially have map information for this declaration already.
8204       // Look for the first set of components that refer to it. If found,
8205       // return true.
8206       // If the first component is a member expression, we have to look into
8207       // 'this', which maps to null in the map of map information. Otherwise
8208       // look directly for the information.
8209       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8210       if (It != Info.end()) {
8211         bool Found = false;
8212         for (auto &Data : It->second) {
8213           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8214             return MI.Components.back().getAssociatedDeclaration() == VD;
8215           });
8216           // If we found a map entry, signal that the pointer has to be
8217           // returned and move on to the next declaration. Exclude cases where
8218           // the base pointer is mapped as array subscript, array section or
8219           // array shaping. The base address is passed as a pointer to base in
8220           // this case and cannot be used as a base for use_device_ptr list
8221           // item.
8222           if (CI != Data.end()) {
8223             if (IsDevAddr) {
8224               CI->ForDeviceAddr = IsDevAddr;
8225               CI->ReturnDevicePointer = true;
8226               Found = true;
8227               break;
8228             } else {
8229               auto PrevCI = std::next(CI->Components.rbegin());
8230               const auto *VarD = dyn_cast<VarDecl>(VD);
8231               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8232                   isa<MemberExpr>(IE) ||
8233                   !VD->getType().getNonReferenceType()->isPointerType() ||
8234                   PrevCI == CI->Components.rend() ||
8235                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8236                   VarD->hasLocalStorage()) {
8237                 CI->ForDeviceAddr = IsDevAddr;
8238                 CI->ReturnDevicePointer = true;
8239                 Found = true;
8240                 break;
8241               }
8242             }
8243           }
8244         }
8245         return Found;
8246       }
8247       return false;
8248     };
8249 
8250     // Look at the use_device_ptr clause information and mark the existing map
8251     // entries as such. If there is no map information for an entry in the
8252     // use_device_ptr list, we create one with map type 'alloc' and zero size
8253     // section. It is the user fault if that was not mapped before. If there is
8254     // no map information and the pointer is a struct member, then we defer the
8255     // emission of that entry until the whole struct has been processed.
8256     for (const auto *Cl : Clauses) {
8257       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8258       if (!C)
8259         continue;
8260       for (const auto L : C->component_lists()) {
8261         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8262             std::get<1>(L);
8263         assert(!Components.empty() &&
8264                "Not expecting empty list of components!");
8265         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8266         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8267         const Expr *IE = Components.back().getAssociatedExpression();
8268         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8269           continue;
8270         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8271                    /*IsDevAddr=*/false);
8272       }
8273     }
8274 
8275     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8276     for (const auto *Cl : Clauses) {
8277       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8278       if (!C)
8279         continue;
8280       for (const auto L : C->component_lists()) {
8281         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8282             std::get<1>(L);
8283         assert(!std::get<1>(L).empty() &&
8284                "Not expecting empty list of components!");
8285         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8286         if (!Processed.insert(VD).second)
8287           continue;
8288         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8289         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8290         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8291           continue;
8292         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8293                    /*IsDevAddr=*/true);
8294       }
8295     }
8296 
8297     for (const auto &Data : Info) {
8298       StructRangeInfoTy PartialStruct;
8299       // Current struct information:
8300       MapCombinedInfoTy CurInfo;
8301       // Current struct base information:
8302       MapCombinedInfoTy StructBaseCurInfo;
8303       const Decl *D = Data.first;
8304       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8305       bool HasMapBasePtr = false;
8306       bool HasMapArraySec = false;
8307       if (VD && VD->getType()->isAnyPointerType()) {
8308         for (const auto &M : Data.second) {
8309           HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8310             return isa_and_present<DeclRefExpr>(L.VarRef);
8311           });
8312           HasMapArraySec = any_of(M, [](const MapInfo &L) {
8313             return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8314                 L.VarRef);
8315           });
8316           if (HasMapBasePtr && HasMapArraySec)
8317             break;
8318         }
8319       }
8320       for (const auto &M : Data.second) {
8321         for (const MapInfo &L : M) {
8322           assert(!L.Components.empty() &&
8323                  "Not expecting declaration with no component lists.");
8324 
8325           // Remember the current base pointer index.
8326           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8327           unsigned StructBasePointersIdx =
8328               StructBaseCurInfo.BasePointers.size();
8329           CurInfo.NonContigInfo.IsNonContiguous =
8330               L.Components.back().isNonContiguous();
8331           generateInfoForComponentList(
8332               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8333               CurInfo, StructBaseCurInfo, PartialStruct,
8334               /*IsFirstComponentList=*/false, L.IsImplicit,
8335               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8336               L.VarRef, /*OverlappedElements*/ {},
8337               HasMapBasePtr && HasMapArraySec);
8338 
8339           // If this entry relates to a device pointer, set the relevant
8340           // declaration and add the 'return pointer' flag.
8341           if (L.ReturnDevicePointer) {
8342             // Check whether a value was added to either CurInfo or
8343             // StructBaseCurInfo and error if no value was added to either of
8344             // them:
8345             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8346                     StructBasePointersIdx <
8347                         StructBaseCurInfo.BasePointers.size()) &&
8348                    "Unexpected number of mapped base pointers.");
8349 
8350             // Choose a base pointer index which is always valid:
8351             const ValueDecl *RelevantVD =
8352                 L.Components.back().getAssociatedDeclaration();
8353             assert(RelevantVD &&
8354                    "No relevant declaration related with device pointer??");
8355 
8356             // If StructBaseCurInfo has been updated this iteration then work on
8357             // the first new entry added to it i.e. make sure that when multiple
8358             // values are added to any of the lists, the first value added is
8359             // being modified by the assignments below (not the last value
8360             // added).
8361             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8362               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8363                   RelevantVD;
8364               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8365                   L.ForDeviceAddr ? DeviceInfoTy::Address
8366                                   : DeviceInfoTy::Pointer;
8367               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8368                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8369             } else {
8370               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8371               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8372                   L.ForDeviceAddr ? DeviceInfoTy::Address
8373                                   : DeviceInfoTy::Pointer;
8374               CurInfo.Types[CurrentBasePointersIdx] |=
8375                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8376             }
8377           }
8378         }
8379       }
8380 
8381       // Append any pending zero-length pointers which are struct members and
8382       // used with use_device_ptr or use_device_addr.
8383       auto CI = DeferredInfo.find(Data.first);
8384       if (CI != DeferredInfo.end()) {
8385         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8386           llvm::Value *BasePtr;
8387           llvm::Value *Ptr;
8388           if (L.ForDeviceAddr) {
8389             if (L.IE->isGLValue())
8390               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8391             else
8392               Ptr = this->CGF.EmitScalarExpr(L.IE);
8393             BasePtr = Ptr;
8394             // Entry is RETURN_PARAM. Also, set the placeholder value
8395             // MEMBER_OF=FFFF so that the entry is later updated with the
8396             // correct value of MEMBER_OF.
8397             CurInfo.Types.push_back(
8398                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8399                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8400           } else {
8401             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8402             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8403                                              L.IE->getExprLoc());
8404             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8405             // placeholder value MEMBER_OF=FFFF so that the entry is later
8406             // updated with the correct value of MEMBER_OF.
8407             CurInfo.Types.push_back(
8408                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8409                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8410                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8411           }
8412           CurInfo.Exprs.push_back(L.VD);
8413           CurInfo.BasePointers.emplace_back(BasePtr);
8414           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8415           CurInfo.DevicePointers.emplace_back(
8416               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8417           CurInfo.Pointers.push_back(Ptr);
8418           CurInfo.Sizes.push_back(
8419               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8420           CurInfo.Mappers.push_back(nullptr);
8421         }
8422       }
8423 
8424       // Unify entries in one list making sure the struct mapping precedes the
8425       // individual fields:
8426       MapCombinedInfoTy UnionCurInfo;
8427       UnionCurInfo.append(StructBaseCurInfo);
8428       UnionCurInfo.append(CurInfo);
8429 
8430       // If there is an entry in PartialStruct it means we have a struct with
8431       // individual members mapped. Emit an extra combined entry.
8432       if (PartialStruct.Base.isValid()) {
8433         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8434         // Emit a combined entry:
8435         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8436                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8437       }
8438 
8439       // We need to append the results of this capture to what we already have.
8440       CombinedInfo.append(UnionCurInfo);
8441     }
8442     // Append data for use_device_ptr clauses.
8443     CombinedInfo.append(UseDeviceDataCombinedInfo);
8444   }
8445 
8446 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8447   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8448       : CurDir(&Dir), CGF(CGF) {
8449     // Extract firstprivate clause information.
8450     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8451       for (const auto *D : C->varlist())
8452         FirstPrivateDecls.try_emplace(
8453             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8454     // Extract implicit firstprivates from uses_allocators clauses.
8455     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8456       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8457         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8458         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8459           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8460                                         /*Implicit=*/true);
8461         else if (const auto *VD = dyn_cast<VarDecl>(
8462                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8463                          ->getDecl()))
8464           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8465       }
8466     }
8467     // Extract device pointer clause information.
8468     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8469       for (auto L : C->component_lists())
8470         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8471     // Extract device addr clause information.
8472     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8473       for (auto L : C->component_lists())
8474         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8475     // Extract map information.
8476     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8477       if (C->getMapType() != OMPC_MAP_to)
8478         continue;
8479       for (auto L : C->component_lists()) {
8480         const ValueDecl *VD = std::get<0>(L);
8481         const auto *RD = VD ? VD->getType()
8482                                   .getCanonicalType()
8483                                   .getNonReferenceType()
8484                                   ->getAsCXXRecordDecl()
8485                             : nullptr;
8486         if (RD && RD->isLambda())
8487           LambdasMap.try_emplace(std::get<0>(L), C);
8488       }
8489     }
8490   }
8491 
8492   /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8493   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8494       : CurDir(&Dir), CGF(CGF) {}
8495 
8496   /// Generate code for the combined entry if we have a partially mapped struct
8497   /// and take care of the mapping flags of the arguments corresponding to
8498   /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,bool IsMapThis,llvm::OpenMPIRBuilder & OMPBuilder,const ValueDecl * VD=nullptr,unsigned OffsetForMemberOfFlag=0,bool NotTargetParams=true) const8499   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8500                          MapFlagsArrayTy &CurTypes,
8501                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8502                          llvm::OpenMPIRBuilder &OMPBuilder,
8503                          const ValueDecl *VD = nullptr,
8504                          unsigned OffsetForMemberOfFlag = 0,
8505                          bool NotTargetParams = true) const {
8506     if (CurTypes.size() == 1 &&
8507         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8508          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8509         !PartialStruct.IsArraySection)
8510       return;
8511     Address LBAddr = PartialStruct.LowestElem.second;
8512     Address HBAddr = PartialStruct.HighestElem.second;
8513     if (PartialStruct.HasCompleteRecord) {
8514       LBAddr = PartialStruct.LB;
8515       HBAddr = PartialStruct.LB;
8516     }
8517     CombinedInfo.Exprs.push_back(VD);
8518     // Base is the base of the struct
8519     CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8520     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8521     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8522     // Pointer is the address of the lowest element
8523     llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8524     const CXXMethodDecl *MD =
8525         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8526     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8527     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8528     // There should not be a mapper for a combined entry.
8529     if (HasBaseClass) {
8530       // OpenMP 5.2 148:21:
8531       // If the target construct is within a class non-static member function,
8532       // and a variable is an accessible data member of the object for which the
8533       // non-static data member function is invoked, the variable is treated as
8534       // if the this[:1] expression had appeared in a map clause with a map-type
8535       // of tofrom.
8536       // Emit this[:1]
8537       CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8538       QualType Ty = MD->getFunctionObjectParameterType();
8539       llvm::Value *Size =
8540           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8541                                     /*isSigned=*/true);
8542       CombinedInfo.Sizes.push_back(Size);
8543     } else {
8544       CombinedInfo.Pointers.push_back(LB);
8545       // Size is (addr of {highest+1} element) - (addr of lowest element)
8546       llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8547       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8548           HBAddr.getElementType(), HB, /*Idx0=*/1);
8549       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8550       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8551       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8552       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8553                                                     /*isSigned=*/false);
8554       CombinedInfo.Sizes.push_back(Size);
8555     }
8556     CombinedInfo.Mappers.push_back(nullptr);
8557     // Map type is always TARGET_PARAM, if generate info for captures.
8558     CombinedInfo.Types.push_back(
8559         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8560         : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8561             ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8562             : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8563     // If any element has the present modifier, then make sure the runtime
8564     // doesn't attempt to allocate the struct.
8565     if (CurTypes.end() !=
8566         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8567           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8568               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8569         }))
8570       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8571     // Remove TARGET_PARAM flag from the first element
8572     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8573     // If any element has the ompx_hold modifier, then make sure the runtime
8574     // uses the hold reference count for the struct as a whole so that it won't
8575     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8576     // elements as well so the runtime knows which reference count to check
8577     // when determining whether it's time for device-to-host transfers of
8578     // individual elements.
8579     if (CurTypes.end() !=
8580         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8581           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8582               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8583         })) {
8584       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8585       for (auto &M : CurTypes)
8586         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8587     }
8588 
8589     // All other current entries will be MEMBER_OF the combined entry
8590     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8591     // 0xFFFF in the MEMBER_OF field).
8592     OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8593         OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8594     for (auto &M : CurTypes)
8595       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8596   }
8597 
8598   /// Generate all the base pointers, section pointers, sizes, map types, and
8599   /// mappers for the extracted mappable expressions (all included in \a
8600   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8601   /// pair of the relevant declaration and index where it occurs is appended to
8602   /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8603   void generateAllInfo(
8604       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8605       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8606           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8607     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8608            "Expect a executable directive");
8609     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8610     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8611                               SkipVarSet);
8612   }
8613 
8614   /// Generate all the base pointers, section pointers, sizes, map types, and
8615   /// mappers for the extracted map clauses of user-defined mapper (all included
8616   /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder) const8617   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8618                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8619     assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8620            "Expect a declare mapper directive");
8621     const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8622     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8623                               OMPBuilder);
8624   }
8625 
8626   /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8627   void generateInfoForLambdaCaptures(
8628       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8629       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8630     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8631     const auto *RD = VDType->getAsCXXRecordDecl();
8632     if (!RD || !RD->isLambda())
8633       return;
8634     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8635                    CGF.getContext().getDeclAlign(VD));
8636     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8637     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8638     FieldDecl *ThisCapture = nullptr;
8639     RD->getCaptureFields(Captures, ThisCapture);
8640     if (ThisCapture) {
8641       LValue ThisLVal =
8642           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8643       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8644       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8645                                  VDLVal.getPointer(CGF));
8646       CombinedInfo.Exprs.push_back(VD);
8647       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8648       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8649       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8650       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8651       CombinedInfo.Sizes.push_back(
8652           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8653                                     CGF.Int64Ty, /*isSigned=*/true));
8654       CombinedInfo.Types.push_back(
8655           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8656           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8657           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8658           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8659       CombinedInfo.Mappers.push_back(nullptr);
8660     }
8661     for (const LambdaCapture &LC : RD->captures()) {
8662       if (!LC.capturesVariable())
8663         continue;
8664       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8665       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8666         continue;
8667       auto It = Captures.find(VD);
8668       assert(It != Captures.end() && "Found lambda capture without field.");
8669       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8670       if (LC.getCaptureKind() == LCK_ByRef) {
8671         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8672         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8673                                    VDLVal.getPointer(CGF));
8674         CombinedInfo.Exprs.push_back(VD);
8675         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8676         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8677         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8678         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8679         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8680             CGF.getTypeSize(
8681                 VD->getType().getCanonicalType().getNonReferenceType()),
8682             CGF.Int64Ty, /*isSigned=*/true));
8683       } else {
8684         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8685         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8686                                    VDLVal.getPointer(CGF));
8687         CombinedInfo.Exprs.push_back(VD);
8688         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8689         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8690         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8691         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8692         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8693       }
8694       CombinedInfo.Types.push_back(
8695           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8696           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8697           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8698           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8699       CombinedInfo.Mappers.push_back(nullptr);
8700     }
8701   }
8702 
8703   /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8704   void adjustMemberOfForLambdaCaptures(
8705       llvm::OpenMPIRBuilder &OMPBuilder,
8706       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8707       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8708       MapFlagsArrayTy &Types) const {
8709     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8710       // Set correct member_of idx for all implicit lambda captures.
8711       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8712                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8713                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8714                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8715         continue;
8716       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8717       assert(BasePtr && "Unable to find base lambda address.");
8718       int TgtIdx = -1;
8719       for (unsigned J = I; J > 0; --J) {
8720         unsigned Idx = J - 1;
8721         if (Pointers[Idx] != BasePtr)
8722           continue;
8723         TgtIdx = Idx;
8724         break;
8725       }
8726       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8727       // All other current entries will be MEMBER_OF the combined entry
8728       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8729       // 0xFFFF in the MEMBER_OF field).
8730       OpenMPOffloadMappingFlags MemberOfFlag =
8731           OMPBuilder.getMemberOfFlag(TgtIdx);
8732       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8733     }
8734   }
8735 
8736   /// For a capture that has an associated clause, generate the base pointers,
8737   /// section pointers, sizes, map types, and mappers (all included in
8738   /// \a CurCaptureVarInfo).
generateInfoForCaptureFromClauseInfo(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CurCaptureVarInfo,llvm::OpenMPIRBuilder & OMPBuilder,unsigned OffsetForMemberOfFlag) const8739   void generateInfoForCaptureFromClauseInfo(
8740       const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8741       MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8742       unsigned OffsetForMemberOfFlag) const {
8743     assert(!Cap->capturesVariableArrayType() &&
8744            "Not expecting to generate map info for a variable array type!");
8745 
8746     // We need to know when we generating information for the first component
8747     const ValueDecl *VD = Cap->capturesThis()
8748                               ? nullptr
8749                               : Cap->getCapturedVar()->getCanonicalDecl();
8750 
8751     // for map(to: lambda): skip here, processing it in
8752     // generateDefaultMapInfo
8753     if (LambdasMap.count(VD))
8754       return;
8755 
8756     // If this declaration appears in a is_device_ptr clause we just have to
8757     // pass the pointer by value. If it is a reference to a declaration, we just
8758     // pass its value.
8759     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8760       CurCaptureVarInfo.Exprs.push_back(VD);
8761       CurCaptureVarInfo.BasePointers.emplace_back(Arg);
8762       CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
8763       CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8764       CurCaptureVarInfo.Pointers.push_back(Arg);
8765       CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8766           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8767           /*isSigned=*/true));
8768       CurCaptureVarInfo.Types.push_back(
8769           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8770           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8771       CurCaptureVarInfo.Mappers.push_back(nullptr);
8772       return;
8773     }
8774 
8775     MapDataArrayTy DeclComponentLists;
8776     // For member fields list in is_device_ptr, store it in
8777     // DeclComponentLists for generating components info.
8778     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8779     auto It = DevPointersMap.find(VD);
8780     if (It != DevPointersMap.end())
8781       for (const auto &MCL : It->second)
8782         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8783                                         /*IsImpicit = */ true, nullptr,
8784                                         nullptr);
8785     auto I = HasDevAddrsMap.find(VD);
8786     if (I != HasDevAddrsMap.end())
8787       for (const auto &MCL : I->second)
8788         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8789                                         /*IsImpicit = */ true, nullptr,
8790                                         nullptr);
8791     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8792            "Expect a executable directive");
8793     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8794     bool HasMapBasePtr = false;
8795     bool HasMapArraySec = false;
8796     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8797       const auto *EI = C->getVarRefs().begin();
8798       for (const auto L : C->decl_component_lists(VD)) {
8799         const ValueDecl *VDecl, *Mapper;
8800         // The Expression is not correct if the mapping is implicit
8801         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8802         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8803         std::tie(VDecl, Components, Mapper) = L;
8804         assert(VDecl == VD && "We got information for the wrong declaration??");
8805         assert(!Components.empty() &&
8806                "Not expecting declaration with no component lists.");
8807         if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8808           HasMapBasePtr = true;
8809         if (VD && E && VD->getType()->isAnyPointerType() &&
8810             (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8811           HasMapArraySec = true;
8812         DeclComponentLists.emplace_back(Components, C->getMapType(),
8813                                         C->getMapTypeModifiers(),
8814                                         C->isImplicit(), Mapper, E);
8815         ++EI;
8816       }
8817     }
8818     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8819                                              const MapData &RHS) {
8820       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8821       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8822       bool HasPresent =
8823           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8824       bool HasAllocs = MapType == OMPC_MAP_alloc;
8825       MapModifiers = std::get<2>(RHS);
8826       MapType = std::get<1>(LHS);
8827       bool HasPresentR =
8828           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8829       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8830       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8831     });
8832 
8833     auto GenerateInfoForComponentLists =
8834         [&](ArrayRef<MapData> DeclComponentLists,
8835             bool IsEligibleForTargetParamFlag) {
8836           MapCombinedInfoTy CurInfoForComponentLists;
8837           StructRangeInfoTy PartialStruct;
8838 
8839           if (DeclComponentLists.empty())
8840             return;
8841 
8842           generateInfoForCaptureFromComponentLists(
8843               VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
8844               IsEligibleForTargetParamFlag,
8845               /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8846 
8847           // If there is an entry in PartialStruct it means we have a
8848           // struct with individual members mapped. Emit an extra combined
8849           // entry.
8850           if (PartialStruct.Base.isValid()) {
8851             CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
8852             emitCombinedEntry(
8853                 CurCaptureVarInfo, CurInfoForComponentLists.Types,
8854                 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
8855                 OffsetForMemberOfFlag,
8856                 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8857           }
8858 
8859           // Return if we didn't add any entries.
8860           if (CurInfoForComponentLists.BasePointers.empty())
8861             return;
8862 
8863           CurCaptureVarInfo.append(CurInfoForComponentLists);
8864         };
8865 
8866     GenerateInfoForComponentLists(DeclComponentLists,
8867                                   /*IsEligibleForTargetParamFlag=*/true);
8868   }
8869 
8870   /// Generate the base pointers, section pointers, sizes, map types, and
8871   /// mappers associated to \a DeclComponentLists for a given capture
8872   /// \a VD (all included in \a CurComponentListInfo).
generateInfoForCaptureFromComponentLists(const ValueDecl * VD,ArrayRef<MapData> DeclComponentLists,MapCombinedInfoTy & CurComponentListInfo,StructRangeInfoTy & PartialStruct,bool IsListEligibleForTargetParamFlag,bool AreBothBasePtrAndPteeMapped=false) const8873   void generateInfoForCaptureFromComponentLists(
8874       const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8875       MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8876       bool IsListEligibleForTargetParamFlag,
8877       bool AreBothBasePtrAndPteeMapped = false) const {
8878     // Find overlapping elements (including the offset from the base element).
8879     llvm::SmallDenseMap<
8880         const MapData *,
8881         llvm::SmallVector<
8882             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8883         4>
8884         OverlappedData;
8885     size_t Count = 0;
8886     for (const MapData &L : DeclComponentLists) {
8887       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8888       OpenMPMapClauseKind MapType;
8889       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8890       bool IsImplicit;
8891       const ValueDecl *Mapper;
8892       const Expr *VarRef;
8893       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8894           L;
8895       ++Count;
8896       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8897         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8898         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8899                  VarRef) = L1;
8900         auto CI = Components.rbegin();
8901         auto CE = Components.rend();
8902         auto SI = Components1.rbegin();
8903         auto SE = Components1.rend();
8904         for (; CI != CE && SI != SE; ++CI, ++SI) {
8905           if (CI->getAssociatedExpression()->getStmtClass() !=
8906               SI->getAssociatedExpression()->getStmtClass())
8907             break;
8908           // Are we dealing with different variables/fields?
8909           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8910             break;
8911         }
8912         // Found overlapping if, at least for one component, reached the head
8913         // of the components list.
8914         if (CI == CE || SI == SE) {
8915           // Ignore it if it is the same component.
8916           if (CI == CE && SI == SE)
8917             continue;
8918           const auto It = (SI == SE) ? CI : SI;
8919           // If one component is a pointer and another one is a kind of
8920           // dereference of this pointer (array subscript, section, dereference,
8921           // etc.), it is not an overlapping.
8922           // Same, if one component is a base and another component is a
8923           // dereferenced pointer memberexpr with the same base.
8924           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8925               (std::prev(It)->getAssociatedDeclaration() &&
8926                std::prev(It)
8927                    ->getAssociatedDeclaration()
8928                    ->getType()
8929                    ->isPointerType()) ||
8930               (It->getAssociatedDeclaration() &&
8931                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8932                std::next(It) != CE && std::next(It) != SE))
8933             continue;
8934           const MapData &BaseData = CI == CE ? L : L1;
8935           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8936               SI == SE ? Components : Components1;
8937           OverlappedData[&BaseData].push_back(SubData);
8938         }
8939       }
8940     }
8941     // Sort the overlapped elements for each item.
8942     llvm::SmallVector<const FieldDecl *, 4> Layout;
8943     if (!OverlappedData.empty()) {
8944       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8945       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8946       while (BaseType != OrigType) {
8947         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8948         OrigType = BaseType->getPointeeOrArrayElementType();
8949       }
8950 
8951       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8952         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8953       else {
8954         const auto *RD = BaseType->getAsRecordDecl();
8955         Layout.append(RD->field_begin(), RD->field_end());
8956       }
8957     }
8958     for (auto &Pair : OverlappedData) {
8959       llvm::stable_sort(
8960           Pair.getSecond(),
8961           [&Layout](
8962               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8963               OMPClauseMappableExprCommon::MappableExprComponentListRef
8964                   Second) {
8965             auto CI = First.rbegin();
8966             auto CE = First.rend();
8967             auto SI = Second.rbegin();
8968             auto SE = Second.rend();
8969             for (; CI != CE && SI != SE; ++CI, ++SI) {
8970               if (CI->getAssociatedExpression()->getStmtClass() !=
8971                   SI->getAssociatedExpression()->getStmtClass())
8972                 break;
8973               // Are we dealing with different variables/fields?
8974               if (CI->getAssociatedDeclaration() !=
8975                   SI->getAssociatedDeclaration())
8976                 break;
8977             }
8978 
8979             // Lists contain the same elements.
8980             if (CI == CE && SI == SE)
8981               return false;
8982 
8983             // List with less elements is less than list with more elements.
8984             if (CI == CE || SI == SE)
8985               return CI == CE;
8986 
8987             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8988             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8989             if (FD1->getParent() == FD2->getParent())
8990               return FD1->getFieldIndex() < FD2->getFieldIndex();
8991             const auto *It =
8992                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8993                   return FD == FD1 || FD == FD2;
8994                 });
8995             return *It == FD1;
8996           });
8997     }
8998 
8999     // Associated with a capture, because the mapping flags depend on it.
9000     // Go through all of the elements with the overlapped elements.
9001     bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9002     MapCombinedInfoTy StructBaseCombinedInfo;
9003     for (const auto &Pair : OverlappedData) {
9004       const MapData &L = *Pair.getFirst();
9005       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9006       OpenMPMapClauseKind MapType;
9007       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9008       bool IsImplicit;
9009       const ValueDecl *Mapper;
9010       const Expr *VarRef;
9011       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9012           L;
9013       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9014           OverlappedComponents = Pair.getSecond();
9015       generateInfoForComponentList(
9016           MapType, MapModifiers, {}, Components, CurComponentListInfo,
9017           StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9018           /*GenerateAllInfoForClauses*/ false, Mapper,
9019           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9020       AddTargetParamFlag = false;
9021     }
9022     // Go through other elements without overlapped elements.
9023     for (const MapData &L : DeclComponentLists) {
9024       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9025       OpenMPMapClauseKind MapType;
9026       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9027       bool IsImplicit;
9028       const ValueDecl *Mapper;
9029       const Expr *VarRef;
9030       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9031           L;
9032       auto It = OverlappedData.find(&L);
9033       if (It == OverlappedData.end())
9034         generateInfoForComponentList(
9035             MapType, MapModifiers, {}, Components, CurComponentListInfo,
9036             StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9037             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9038             /*ForDeviceAddr=*/false, VD, VarRef,
9039             /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9040       AddTargetParamFlag = false;
9041     }
9042   }
9043 
9044   /// Generate the default map information for a given capture \a CI,
9045   /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const9046   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9047                               const FieldDecl &RI, llvm::Value *CV,
9048                               MapCombinedInfoTy &CombinedInfo) const {
9049     bool IsImplicit = true;
9050     // Do the default mapping.
9051     if (CI.capturesThis()) {
9052       CombinedInfo.Exprs.push_back(nullptr);
9053       CombinedInfo.BasePointers.push_back(CV);
9054       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9055       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9056       CombinedInfo.Pointers.push_back(CV);
9057       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9058       CombinedInfo.Sizes.push_back(
9059           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9060                                     CGF.Int64Ty, /*isSigned=*/true));
9061       // Default map type.
9062       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9063                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9064     } else if (CI.capturesVariableByCopy()) {
9065       const VarDecl *VD = CI.getCapturedVar();
9066       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9067       CombinedInfo.BasePointers.push_back(CV);
9068       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9069       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9070       CombinedInfo.Pointers.push_back(CV);
9071       if (!RI.getType()->isAnyPointerType()) {
9072         // We have to signal to the runtime captures passed by value that are
9073         // not pointers.
9074         CombinedInfo.Types.push_back(
9075             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9076         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9077             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9078       } else {
9079         // Pointers are implicitly mapped with a zero size and no flags
9080         // (other than first map that is added for all implicit maps).
9081         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9082         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9083       }
9084       auto I = FirstPrivateDecls.find(VD);
9085       if (I != FirstPrivateDecls.end())
9086         IsImplicit = I->getSecond();
9087     } else {
9088       assert(CI.capturesVariable() && "Expected captured reference.");
9089       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9090       QualType ElementType = PtrTy->getPointeeType();
9091       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9092           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9093       // The default map type for a scalar/complex type is 'to' because by
9094       // default the value doesn't have to be retrieved. For an aggregate
9095       // type, the default is 'tofrom'.
9096       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9097       const VarDecl *VD = CI.getCapturedVar();
9098       auto I = FirstPrivateDecls.find(VD);
9099       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9100       CombinedInfo.BasePointers.push_back(CV);
9101       CombinedInfo.DevicePtrDecls.push_back(nullptr);
9102       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9103       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9104         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9105             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9106             AlignmentSource::Decl));
9107         CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9108       } else {
9109         CombinedInfo.Pointers.push_back(CV);
9110       }
9111       if (I != FirstPrivateDecls.end())
9112         IsImplicit = I->getSecond();
9113     }
9114     // Every default map produces a single argument which is a target parameter.
9115     CombinedInfo.Types.back() |=
9116         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9117 
9118     // Add flag stating this is an implicit map.
9119     if (IsImplicit)
9120       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9121 
9122     // No user-defined mapper for default mapping.
9123     CombinedInfo.Mappers.push_back(nullptr);
9124   }
9125 };
9126 } // anonymous namespace
9127 
9128 // Try to extract the base declaration from a `this->x` expression if possible.
getDeclFromThisExpr(const Expr * E)9129 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9130   if (!E)
9131     return nullptr;
9132 
9133   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9134     if (const MemberExpr *ME =
9135             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9136       return ME->getMemberDecl();
9137   return nullptr;
9138 }
9139 
9140 /// Emit a string constant containing the names of the values mapped to the
9141 /// offloading runtime library.
9142 static llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)9143 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9144                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9145 
9146   uint32_t SrcLocStrSize;
9147   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9148     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9149 
9150   SourceLocation Loc;
9151   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9152     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9153       Loc = VD->getLocation();
9154     else
9155       Loc = MapExprs.getMapExpr()->getExprLoc();
9156   } else {
9157     Loc = MapExprs.getMapDecl()->getLocation();
9158   }
9159 
9160   std::string ExprName;
9161   if (MapExprs.getMapExpr()) {
9162     PrintingPolicy P(CGF.getContext().getLangOpts());
9163     llvm::raw_string_ostream OS(ExprName);
9164     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9165   } else {
9166     ExprName = MapExprs.getMapDecl()->getNameAsString();
9167   }
9168 
9169   std::string FileName;
9170   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9171   if (auto *DbgInfo = CGF.getDebugInfo())
9172     FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9173   else
9174     FileName = PLoc.getFilename();
9175   return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9176                                          PLoc.getColumn(), SrcLocStrSize);
9177 }
9178 /// Emit the arrays used to pass the captures and map information to the
9179 /// offloading runtime library. If there is no map or capture information,
9180 /// return nullptr by reference.
emitOffloadingArraysAndArgs(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false,bool ForEndCall=false)9181 static void emitOffloadingArraysAndArgs(
9182     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9183     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9184     bool IsNonContiguous = false, bool ForEndCall = false) {
9185   CodeGenModule &CGM = CGF.CGM;
9186 
9187   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9188   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9189                          CGF.AllocaInsertPt->getIterator());
9190   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9191                           CGF.Builder.GetInsertPoint());
9192 
9193   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9194     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9195       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9196     }
9197   };
9198 
9199   auto CustomMapperCB = [&](unsigned int I) {
9200     llvm::Function *MFunc = nullptr;
9201     if (CombinedInfo.Mappers[I]) {
9202       Info.HasMapper = true;
9203       MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9204           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9205     }
9206     return MFunc;
9207   };
9208   cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9209       AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9210       IsNonContiguous, ForEndCall, DeviceAddrCB));
9211 }
9212 
9213 /// Check for inner distribute directive.
9214 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)9215 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9216   const auto *CS = D.getInnermostCapturedStmt();
9217   const auto *Body =
9218       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9219   const Stmt *ChildStmt =
9220       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9221 
9222   if (const auto *NestedDir =
9223           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9224     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9225     switch (D.getDirectiveKind()) {
9226     case OMPD_target:
9227       // For now, treat 'target' with nested 'teams loop' as if it's
9228       // distributed (target teams distribute).
9229       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9230         return NestedDir;
9231       if (DKind == OMPD_teams) {
9232         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9233             /*IgnoreCaptured=*/true);
9234         if (!Body)
9235           return nullptr;
9236         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9237         if (const auto *NND =
9238                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9239           DKind = NND->getDirectiveKind();
9240           if (isOpenMPDistributeDirective(DKind))
9241             return NND;
9242         }
9243       }
9244       return nullptr;
9245     case OMPD_target_teams:
9246       if (isOpenMPDistributeDirective(DKind))
9247         return NestedDir;
9248       return nullptr;
9249     case OMPD_target_parallel:
9250     case OMPD_target_simd:
9251     case OMPD_target_parallel_for:
9252     case OMPD_target_parallel_for_simd:
9253       return nullptr;
9254     case OMPD_target_teams_distribute:
9255     case OMPD_target_teams_distribute_simd:
9256     case OMPD_target_teams_distribute_parallel_for:
9257     case OMPD_target_teams_distribute_parallel_for_simd:
9258     case OMPD_parallel:
9259     case OMPD_for:
9260     case OMPD_parallel_for:
9261     case OMPD_parallel_master:
9262     case OMPD_parallel_sections:
9263     case OMPD_for_simd:
9264     case OMPD_parallel_for_simd:
9265     case OMPD_cancel:
9266     case OMPD_cancellation_point:
9267     case OMPD_ordered:
9268     case OMPD_threadprivate:
9269     case OMPD_allocate:
9270     case OMPD_task:
9271     case OMPD_simd:
9272     case OMPD_tile:
9273     case OMPD_unroll:
9274     case OMPD_sections:
9275     case OMPD_section:
9276     case OMPD_single:
9277     case OMPD_master:
9278     case OMPD_critical:
9279     case OMPD_taskyield:
9280     case OMPD_barrier:
9281     case OMPD_taskwait:
9282     case OMPD_taskgroup:
9283     case OMPD_atomic:
9284     case OMPD_flush:
9285     case OMPD_depobj:
9286     case OMPD_scan:
9287     case OMPD_teams:
9288     case OMPD_target_data:
9289     case OMPD_target_exit_data:
9290     case OMPD_target_enter_data:
9291     case OMPD_distribute:
9292     case OMPD_distribute_simd:
9293     case OMPD_distribute_parallel_for:
9294     case OMPD_distribute_parallel_for_simd:
9295     case OMPD_teams_distribute:
9296     case OMPD_teams_distribute_simd:
9297     case OMPD_teams_distribute_parallel_for:
9298     case OMPD_teams_distribute_parallel_for_simd:
9299     case OMPD_target_update:
9300     case OMPD_declare_simd:
9301     case OMPD_declare_variant:
9302     case OMPD_begin_declare_variant:
9303     case OMPD_end_declare_variant:
9304     case OMPD_declare_target:
9305     case OMPD_end_declare_target:
9306     case OMPD_declare_reduction:
9307     case OMPD_declare_mapper:
9308     case OMPD_taskloop:
9309     case OMPD_taskloop_simd:
9310     case OMPD_master_taskloop:
9311     case OMPD_master_taskloop_simd:
9312     case OMPD_parallel_master_taskloop:
9313     case OMPD_parallel_master_taskloop_simd:
9314     case OMPD_requires:
9315     case OMPD_metadirective:
9316     case OMPD_unknown:
9317     default:
9318       llvm_unreachable("Unexpected directive.");
9319     }
9320   }
9321 
9322   return nullptr;
9323 }
9324 
9325 /// Emit the user-defined mapper function. The code generation follows the
9326 /// pattern in the example below.
9327 /// \code
9328 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9329 ///                                           void *base, void *begin,
9330 ///                                           int64_t size, int64_t type,
9331 ///                                           void *name = nullptr) {
9332 ///   // Allocate space for an array section first or add a base/begin for
9333 ///   // pointer dereference.
9334 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9335 ///       !maptype.IsDelete)
9336 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9337 ///                                 size*sizeof(Ty), clearToFromMember(type));
9338 ///   // Map members.
9339 ///   for (unsigned i = 0; i < size; i++) {
9340 ///     // For each component specified by this mapper:
9341 ///     for (auto c : begin[i]->all_components) {
9342 ///       if (c.hasMapper())
9343 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9344 ///                       c.arg_type, c.arg_name);
9345 ///       else
9346 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9347 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9348 ///                                     c.arg_name);
9349 ///     }
9350 ///   }
9351 ///   // Delete the array section.
9352 ///   if (size > 1 && maptype.IsDelete)
9353 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9354 ///                                 size*sizeof(Ty), clearToFromMember(type));
9355 /// }
9356 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9357 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9358                                             CodeGenFunction *CGF) {
9359   if (UDMMap.count(D) > 0)
9360     return;
9361   ASTContext &C = CGM.getContext();
9362   QualType Ty = D->getType();
9363   auto *MapperVarDecl =
9364       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9365   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9366   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9367 
9368   CodeGenFunction MapperCGF(CGM);
9369   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9370   auto PrivatizeAndGenMapInfoCB =
9371       [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9372           llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9373     MapperCGF.Builder.restoreIP(CodeGenIP);
9374 
9375     // Privatize the declared variable of mapper to be the current array
9376     // element.
9377     Address PtrCurrent(
9378         PtrPHI, ElemTy,
9379         Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9380             .getAlignment()
9381             .alignmentOfArrayElement(ElementSize));
9382     CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9383     Scope.addPrivate(MapperVarDecl, PtrCurrent);
9384     (void)Scope.Privatize();
9385 
9386     // Get map clause information.
9387     MappableExprsHandler MEHandler(*D, MapperCGF);
9388     MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9389 
9390     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9391       return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9392     };
9393     if (CGM.getCodeGenOpts().getDebugInfo() !=
9394         llvm::codegenoptions::NoDebugInfo) {
9395       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9396       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9397                       FillInfoMap);
9398     }
9399 
9400     return CombinedInfo;
9401   };
9402 
9403   auto CustomMapperCB = [&](unsigned I) {
9404     llvm::Function *MapperFunc = nullptr;
9405     if (CombinedInfo.Mappers[I]) {
9406       // Call the corresponding mapper function.
9407       MapperFunc = getOrCreateUserDefinedMapperFunc(
9408           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9409       assert(MapperFunc && "Expect a valid mapper function is available.");
9410     }
9411     return MapperFunc;
9412   };
9413 
9414   SmallString<64> TyStr;
9415   llvm::raw_svector_ostream Out(TyStr);
9416   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9417   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9418 
9419   llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9420       PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9421   UDMMap.try_emplace(D, NewFn);
9422   if (CGF)
9423     FunctionUDMMap[CGF->CurFn].push_back(D);
9424 }
9425 
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)9426 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9427     const OMPDeclareMapperDecl *D) {
9428   auto I = UDMMap.find(D);
9429   if (I != UDMMap.end())
9430     return I->second;
9431   emitUserDefinedMapper(D);
9432   return UDMMap.lookup(D);
9433 }
9434 
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9435 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9436     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9437     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9438                                      const OMPLoopDirective &D)>
9439         SizeEmitter) {
9440   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9441   const OMPExecutableDirective *TD = &D;
9442   // Get nested teams distribute kind directive, if any. For now, treat
9443   // 'target_teams_loop' as if it's really a target_teams_distribute.
9444   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9445       Kind != OMPD_target_teams_loop)
9446     TD = getNestedDistributeDirective(CGM.getContext(), D);
9447   if (!TD)
9448     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9449 
9450   const auto *LD = cast<OMPLoopDirective>(TD);
9451   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9452     return NumIterations;
9453   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9454 }
9455 
9456 static void
emitTargetCallFallback(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9457 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9458                        const OMPExecutableDirective &D,
9459                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9460                        bool RequiresOuterTask, const CapturedStmt &CS,
9461                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9462   if (OffloadingMandatory) {
9463     CGF.Builder.CreateUnreachable();
9464   } else {
9465     if (RequiresOuterTask) {
9466       CapturedVars.clear();
9467       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9468     }
9469     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9470                                          CapturedVars);
9471   }
9472 }
9473 
emitDeviceID(llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,CodeGenFunction & CGF)9474 static llvm::Value *emitDeviceID(
9475     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9476     CodeGenFunction &CGF) {
9477   // Emit device ID if any.
9478   llvm::Value *DeviceID;
9479   if (Device.getPointer()) {
9480     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9481             Device.getInt() == OMPC_DEVICE_device_num) &&
9482            "Expected device_num modifier.");
9483     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9484     DeviceID =
9485         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9486   } else {
9487     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9488   }
9489   return DeviceID;
9490 }
9491 
emitDynCGGroupMem(const OMPExecutableDirective & D,CodeGenFunction & CGF)9492 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9493                                       CodeGenFunction &CGF) {
9494   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9495 
9496   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9497     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9498     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9499         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9500     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9501                                              /*isSigned=*/false);
9502   }
9503   return DynCGroupMem;
9504 }
genMapInfoForCaptures(MappableExprsHandler & MEHandler,CodeGenFunction & CGF,const CapturedStmt & CS,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,llvm::OpenMPIRBuilder & OMPBuilder,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & MappedVarSet,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo)9505 static void genMapInfoForCaptures(
9506     MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9507     const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9508     llvm::OpenMPIRBuilder &OMPBuilder,
9509     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9510     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9511 
9512   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9513   auto RI = CS.getCapturedRecordDecl()->field_begin();
9514   auto *CV = CapturedVars.begin();
9515   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9516                                             CE = CS.capture_end();
9517        CI != CE; ++CI, ++RI, ++CV) {
9518     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9519 
9520     // VLA sizes are passed to the outlined region by copy and do not have map
9521     // information associated.
9522     if (CI->capturesVariableArrayType()) {
9523       CurInfo.Exprs.push_back(nullptr);
9524       CurInfo.BasePointers.push_back(*CV);
9525       CurInfo.DevicePtrDecls.push_back(nullptr);
9526       CurInfo.DevicePointers.push_back(
9527           MappableExprsHandler::DeviceInfoTy::None);
9528       CurInfo.Pointers.push_back(*CV);
9529       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9530           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9531       // Copy to the device as an argument. No need to retrieve it.
9532       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9533                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9534                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9535       CurInfo.Mappers.push_back(nullptr);
9536     } else {
9537       // If we have any information in the map clause, we use it, otherwise we
9538       // just do a default mapping.
9539       MEHandler.generateInfoForCaptureFromClauseInfo(
9540           CI, *CV, CurInfo, OMPBuilder,
9541           /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9542 
9543       if (!CI->capturesThis())
9544         MappedVarSet.insert(CI->getCapturedVar());
9545       else
9546         MappedVarSet.insert(nullptr);
9547 
9548       if (CurInfo.BasePointers.empty())
9549         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9550 
9551       // Generate correct mapping for variables captured by reference in
9552       // lambdas.
9553       if (CI->capturesVariable())
9554         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9555                                                 CurInfo, LambdaPointers);
9556     }
9557     // We expect to have at least an element of information for this capture.
9558     assert(!CurInfo.BasePointers.empty() &&
9559            "Non-existing map pointer for capture!");
9560     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9561            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9562            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9563            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9564            "Inconsistent map information sizes!");
9565 
9566     // We need to append the results of this capture to what we already have.
9567     CombinedInfo.append(CurInfo);
9568   }
9569   // Adjust MEMBER_OF flags for the lambdas captures.
9570   MEHandler.adjustMemberOfForLambdaCaptures(
9571       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9572       CombinedInfo.Pointers, CombinedInfo.Types);
9573 }
9574 static void
genMapInfo(MappableExprsHandler & MEHandler,CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,llvm::OpenMPIRBuilder & OMPBuilder,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkippedVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ())9575 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9576            MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9577            llvm::OpenMPIRBuilder &OMPBuilder,
9578            const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9579                llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9580 
9581   CodeGenModule &CGM = CGF.CGM;
9582   // Map any list items in a map clause that were not captures because they
9583   // weren't referenced within the construct.
9584   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9585 
9586   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9587     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9588   };
9589   if (CGM.getCodeGenOpts().getDebugInfo() !=
9590       llvm::codegenoptions::NoDebugInfo) {
9591     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9592     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9593                     FillInfoMap);
9594   }
9595 }
9596 
genMapInfo(const OMPExecutableDirective & D,CodeGenFunction & CGF,const CapturedStmt & CS,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo)9597 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9598                        const CapturedStmt &CS,
9599                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600                        llvm::OpenMPIRBuilder &OMPBuilder,
9601                        MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9602   // Get mappable expression information.
9603   MappableExprsHandler MEHandler(D, CGF);
9604   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9605 
9606   genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9607                         MappedVarSet, CombinedInfo);
9608   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9609 }
9610 
9611 template <typename ClauseTy>
9612 static void
emitClauseForBareTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & Values)9613 emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9614                                  const OMPExecutableDirective &D,
9615                                  llvm::SmallVectorImpl<llvm::Value *> &Values) {
9616   const auto *C = D.getSingleClause<ClauseTy>();
9617   assert(!C->varlist_empty() &&
9618          "ompx_bare requires explicit num_teams and thread_limit");
9619   CodeGenFunction::RunCleanupsScope Scope(CGF);
9620   for (auto *E : C->varlist()) {
9621     llvm::Value *V = CGF.EmitScalarExpr(E);
9622     Values.push_back(
9623         CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9624   }
9625 }
9626 
emitTargetCallKernelLaunch(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::Value * OutlinedFnID,CodeGenFunction::OMPTargetDataInfo & InputInfo,llvm::Value * & MapTypesArray,llvm::Value * & MapNamesArray,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter,CodeGenFunction & CGF,CodeGenModule & CGM)9627 static void emitTargetCallKernelLaunch(
9628     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9629     const OMPExecutableDirective &D,
9630     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9631     const CapturedStmt &CS, bool OffloadingMandatory,
9632     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9633     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9634     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9635     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9636                                      const OMPLoopDirective &D)>
9637         SizeEmitter,
9638     CodeGenFunction &CGF, CodeGenModule &CGM) {
9639   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9640 
9641   // Fill up the arrays with all the captured variables.
9642   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9643   CGOpenMPRuntime::TargetDataInfo Info;
9644   genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9645 
9646   emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9647                               /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9648 
9649   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9650   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9651                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9652   InputInfo.PointersArray =
9653       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9654   InputInfo.SizesArray =
9655       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9656   InputInfo.MappersArray =
9657       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9658   MapTypesArray = Info.RTArgs.MapTypesArray;
9659   MapNamesArray = Info.RTArgs.MapNamesArray;
9660 
9661   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9662                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9663                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9664                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9665     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9666 
9667     if (IsReverseOffloading) {
9668       // Reverse offloading is not supported, so just execute on the host.
9669       // FIXME: This fallback solution is incorrect since it ignores the
9670       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9671       // assert here and ensure SEMA emits an error.
9672       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9673                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9674       return;
9675     }
9676 
9677     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9678     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9679 
9680     llvm::Value *BasePointersArray =
9681         InputInfo.BasePointersArray.emitRawPointer(CGF);
9682     llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9683     llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9684     llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9685 
9686     auto &&EmitTargetCallFallbackCB =
9687         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9688          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9689         -> llvm::OpenMPIRBuilder::InsertPointTy {
9690       CGF.Builder.restoreIP(IP);
9691       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9692                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9693       return CGF.Builder.saveIP();
9694     };
9695 
9696     bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9697     SmallVector<llvm::Value *, 3> NumTeams;
9698     SmallVector<llvm::Value *, 3> NumThreads;
9699     if (IsBare) {
9700       emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9701       emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9702                                                              NumThreads);
9703     } else {
9704       NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9705       NumThreads.push_back(
9706           OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9707     }
9708 
9709     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9710     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9711     llvm::Value *NumIterations =
9712         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9713     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9714     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9715         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9716 
9717     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9718         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9719         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9720 
9721     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9722         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9723         DynCGGroupMem, HasNoWait);
9724 
9725     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9726         cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9727             CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9728             RTLoc, AllocaIP));
9729     CGF.Builder.restoreIP(AfterIP);
9730   };
9731 
9732   if (RequiresOuterTask)
9733     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9734   else
9735     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9736 }
9737 
9738 static void
emitTargetCallElse(CGOpenMPRuntime * OMPRuntime,llvm::Function * OutlinedFn,const OMPExecutableDirective & D,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars,bool RequiresOuterTask,const CapturedStmt & CS,bool OffloadingMandatory,CodeGenFunction & CGF)9739 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9740                    const OMPExecutableDirective &D,
9741                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9742                    bool RequiresOuterTask, const CapturedStmt &CS,
9743                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9744 
9745   // Notify that the host version must be executed.
9746   auto &&ElseGen =
9747       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9748        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9749         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9750                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9751       };
9752 
9753   if (RequiresOuterTask) {
9754     CodeGenFunction::OMPTargetDataInfo InputInfo;
9755     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9756   } else {
9757     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9758   }
9759 }
9760 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9761 void CGOpenMPRuntime::emitTargetCall(
9762     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9763     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9764     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9765     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9766                                      const OMPLoopDirective &D)>
9767         SizeEmitter) {
9768   if (!CGF.HaveInsertPoint())
9769     return;
9770 
9771   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9772                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9773 
9774   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9775 
9776   const bool RequiresOuterTask =
9777       D.hasClausesOfKind<OMPDependClause>() ||
9778       D.hasClausesOfKind<OMPNowaitClause>() ||
9779       D.hasClausesOfKind<OMPInReductionClause>() ||
9780       (CGM.getLangOpts().OpenMP >= 51 &&
9781        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9782        D.hasClausesOfKind<OMPThreadLimitClause>());
9783   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9784   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9785   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9786                                             PrePostActionTy &) {
9787     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9788   };
9789   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9790 
9791   CodeGenFunction::OMPTargetDataInfo InputInfo;
9792   llvm::Value *MapTypesArray = nullptr;
9793   llvm::Value *MapNamesArray = nullptr;
9794 
9795   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9796                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9797                           OutlinedFnID, &InputInfo, &MapTypesArray,
9798                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9799                                                        PrePostActionTy &) {
9800     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9801                                RequiresOuterTask, CS, OffloadingMandatory,
9802                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9803                                MapNamesArray, SizeEmitter, CGF, CGM);
9804   };
9805 
9806   auto &&TargetElseGen =
9807       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9808        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9809         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9810                            CS, OffloadingMandatory, CGF);
9811       };
9812 
9813   // If we have a target function ID it means that we need to support
9814   // offloading, otherwise, just execute on the host. We need to execute on host
9815   // regardless of the conditional in the if clause if, e.g., the user do not
9816   // specify target triples.
9817   if (OutlinedFnID) {
9818     if (IfCond) {
9819       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9820     } else {
9821       RegionCodeGenTy ThenRCG(TargetThenGen);
9822       ThenRCG(CGF);
9823     }
9824   } else {
9825     RegionCodeGenTy ElseRCG(TargetElseGen);
9826     ElseRCG(CGF);
9827   }
9828 }
9829 
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9830 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9831                                                     StringRef ParentName) {
9832   if (!S)
9833     return;
9834 
9835   // Codegen OMP target directives that offload compute to the device.
9836   bool RequiresDeviceCodegen =
9837       isa<OMPExecutableDirective>(S) &&
9838       isOpenMPTargetExecutionDirective(
9839           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9840 
9841   if (RequiresDeviceCodegen) {
9842     const auto &E = *cast<OMPExecutableDirective>(S);
9843 
9844     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9845         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9846 
9847     // Is this a target region that should not be emitted as an entry point? If
9848     // so just signal we are done with this target region.
9849     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9850       return;
9851 
9852     switch (E.getDirectiveKind()) {
9853     case OMPD_target:
9854       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9855                                                    cast<OMPTargetDirective>(E));
9856       break;
9857     case OMPD_target_parallel:
9858       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9859           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9860       break;
9861     case OMPD_target_teams:
9862       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9863           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9864       break;
9865     case OMPD_target_teams_distribute:
9866       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9867           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9868       break;
9869     case OMPD_target_teams_distribute_simd:
9870       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9871           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9872       break;
9873     case OMPD_target_parallel_for:
9874       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9875           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9876       break;
9877     case OMPD_target_parallel_for_simd:
9878       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9879           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9880       break;
9881     case OMPD_target_simd:
9882       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9883           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9884       break;
9885     case OMPD_target_teams_distribute_parallel_for:
9886       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9887           CGM, ParentName,
9888           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9889       break;
9890     case OMPD_target_teams_distribute_parallel_for_simd:
9891       CodeGenFunction::
9892           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9893               CGM, ParentName,
9894               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9895       break;
9896     case OMPD_target_teams_loop:
9897       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9898           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9899       break;
9900     case OMPD_target_parallel_loop:
9901       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9902           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9903       break;
9904     case OMPD_parallel:
9905     case OMPD_for:
9906     case OMPD_parallel_for:
9907     case OMPD_parallel_master:
9908     case OMPD_parallel_sections:
9909     case OMPD_for_simd:
9910     case OMPD_parallel_for_simd:
9911     case OMPD_cancel:
9912     case OMPD_cancellation_point:
9913     case OMPD_ordered:
9914     case OMPD_threadprivate:
9915     case OMPD_allocate:
9916     case OMPD_task:
9917     case OMPD_simd:
9918     case OMPD_tile:
9919     case OMPD_unroll:
9920     case OMPD_sections:
9921     case OMPD_section:
9922     case OMPD_single:
9923     case OMPD_master:
9924     case OMPD_critical:
9925     case OMPD_taskyield:
9926     case OMPD_barrier:
9927     case OMPD_taskwait:
9928     case OMPD_taskgroup:
9929     case OMPD_atomic:
9930     case OMPD_flush:
9931     case OMPD_depobj:
9932     case OMPD_scan:
9933     case OMPD_teams:
9934     case OMPD_target_data:
9935     case OMPD_target_exit_data:
9936     case OMPD_target_enter_data:
9937     case OMPD_distribute:
9938     case OMPD_distribute_simd:
9939     case OMPD_distribute_parallel_for:
9940     case OMPD_distribute_parallel_for_simd:
9941     case OMPD_teams_distribute:
9942     case OMPD_teams_distribute_simd:
9943     case OMPD_teams_distribute_parallel_for:
9944     case OMPD_teams_distribute_parallel_for_simd:
9945     case OMPD_target_update:
9946     case OMPD_declare_simd:
9947     case OMPD_declare_variant:
9948     case OMPD_begin_declare_variant:
9949     case OMPD_end_declare_variant:
9950     case OMPD_declare_target:
9951     case OMPD_end_declare_target:
9952     case OMPD_declare_reduction:
9953     case OMPD_declare_mapper:
9954     case OMPD_taskloop:
9955     case OMPD_taskloop_simd:
9956     case OMPD_master_taskloop:
9957     case OMPD_master_taskloop_simd:
9958     case OMPD_parallel_master_taskloop:
9959     case OMPD_parallel_master_taskloop_simd:
9960     case OMPD_requires:
9961     case OMPD_metadirective:
9962     case OMPD_unknown:
9963     default:
9964       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9965     }
9966     return;
9967   }
9968 
9969   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9970     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9971       return;
9972 
9973     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9974     return;
9975   }
9976 
9977   // If this is a lambda function, look into its body.
9978   if (const auto *L = dyn_cast<LambdaExpr>(S))
9979     S = L->getBody();
9980 
9981   // Keep looking for target regions recursively.
9982   for (const Stmt *II : S->children())
9983     scanForTargetRegionsFunctions(II, ParentName);
9984 }
9985 
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)9986 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9987   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9988       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9989   if (!DevTy)
9990     return false;
9991   // Do not emit device_type(nohost) functions for the host.
9992   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9993     return true;
9994   // Do not emit device_type(host) functions for the device.
9995   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9996     return true;
9997   return false;
9998 }
9999 
emitTargetFunctions(GlobalDecl GD)10000 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10001   // If emitting code for the host, we do not process FD here. Instead we do
10002   // the normal code generation.
10003   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10004     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10005       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10006                                   CGM.getLangOpts().OpenMPIsTargetDevice))
10007         return true;
10008     return false;
10009   }
10010 
10011   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10012   // Try to detect target regions in the function.
10013   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10014     StringRef Name = CGM.getMangledName(GD);
10015     scanForTargetRegionsFunctions(FD->getBody(), Name);
10016     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10017                                 CGM.getLangOpts().OpenMPIsTargetDevice))
10018       return true;
10019   }
10020 
10021   // Do not to emit function if it is not marked as declare target.
10022   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10023          AlreadyEmittedTargetDecls.count(VD) == 0;
10024 }
10025 
emitTargetGlobalVariable(GlobalDecl GD)10026 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10027   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10028                               CGM.getLangOpts().OpenMPIsTargetDevice))
10029     return true;
10030 
10031   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10032     return false;
10033 
10034   // Check if there are Ctors/Dtors in this declaration and look for target
10035   // regions in it. We use the complete variant to produce the kernel name
10036   // mangling.
10037   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10038   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10039     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10040       StringRef ParentName =
10041           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10042       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10043     }
10044     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10045       StringRef ParentName =
10046           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10047       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10048     }
10049   }
10050 
10051   // Do not to emit variable if it is not marked as declare target.
10052   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10053       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10054           cast<VarDecl>(GD.getDecl()));
10055   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10056       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10057         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10058        HasRequiresUnifiedSharedMemory)) {
10059     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10060     return true;
10061   }
10062   return false;
10063 }
10064 
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)10065 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10066                                                    llvm::Constant *Addr) {
10067   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10068       !CGM.getLangOpts().OpenMPIsTargetDevice)
10069     return;
10070 
10071   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10072       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10073 
10074   // If this is an 'extern' declaration we defer to the canonical definition and
10075   // do not emit an offloading entry.
10076   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10077       VD->hasExternalStorage())
10078     return;
10079 
10080   if (!Res) {
10081     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10082       // Register non-target variables being emitted in device code (debug info
10083       // may cause this).
10084       StringRef VarName = CGM.getMangledName(VD);
10085       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10086     }
10087     return;
10088   }
10089 
10090   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10091   auto LinkageForVariable = [&VD, this]() {
10092     return CGM.getLLVMLinkageVarDefinition(VD);
10093   };
10094 
10095   std::vector<llvm::GlobalVariable *> GeneratedRefs;
10096   OMPBuilder.registerTargetGlobalVariable(
10097       convertCaptureClause(VD), convertDeviceClause(VD),
10098       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10099       VD->isExternallyVisible(),
10100       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10101                                   VD->getCanonicalDecl()->getBeginLoc()),
10102       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10103       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10104       CGM.getTypes().ConvertTypeForMem(
10105           CGM.getContext().getPointerType(VD->getType())),
10106       Addr);
10107 
10108   for (auto *ref : GeneratedRefs)
10109     CGM.addCompilerUsedGlobal(ref);
10110 }
10111 
emitTargetGlobal(GlobalDecl GD)10112 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10113   if (isa<FunctionDecl>(GD.getDecl()) ||
10114       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10115     return emitTargetFunctions(GD);
10116 
10117   return emitTargetGlobalVariable(GD);
10118 }
10119 
emitDeferredTargetDecls() const10120 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10121   for (const VarDecl *VD : DeferredGlobalVariables) {
10122     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10123         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10124     if (!Res)
10125       continue;
10126     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10127          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10128         !HasRequiresUnifiedSharedMemory) {
10129       CGM.EmitGlobal(VD);
10130     } else {
10131       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10132               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10133                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10134                HasRequiresUnifiedSharedMemory)) &&
10135              "Expected link clause or to clause with unified memory.");
10136       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10137     }
10138   }
10139 }
10140 
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10141 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10142     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10143   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10144          " Expected target-based directive.");
10145 }
10146 
processRequiresDirective(const OMPRequiresDecl * D)10147 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10148   for (const OMPClause *Clause : D->clauselists()) {
10149     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10150       HasRequiresUnifiedSharedMemory = true;
10151       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10152     } else if (const auto *AC =
10153                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10154       switch (AC->getAtomicDefaultMemOrderKind()) {
10155       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10156         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10157         break;
10158       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10159         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10160         break;
10161       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10162         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10163         break;
10164       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10165         break;
10166       }
10167     }
10168   }
10169 }
10170 
getDefaultMemoryOrdering() const10171 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10172   return RequiresAtomicOrdering;
10173 }
10174 
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10175 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10176                                                        LangAS &AS) {
10177   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10178     return false;
10179   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10180   switch(A->getAllocatorType()) {
10181   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10182   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10183   // Not supported, fallback to the default mem space.
10184   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10185   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10186   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10187   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10188   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10189   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10190   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10191     AS = LangAS::Default;
10192     return true;
10193   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10194     llvm_unreachable("Expected predefined allocator for the variables with the "
10195                      "static storage.");
10196   }
10197   return false;
10198 }
10199 
hasRequiresUnifiedSharedMemory() const10200 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10201   return HasRequiresUnifiedSharedMemory;
10202 }
10203 
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10204 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10205     CodeGenModule &CGM)
10206     : CGM(CGM) {
10207   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10208     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10209     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10210   }
10211 }
10212 
~DisableAutoDeclareTargetRAII()10213 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10214   if (CGM.getLangOpts().OpenMPIsTargetDevice)
10215     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10216 }
10217 
markAsGlobalTarget(GlobalDecl GD)10218 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10219   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10220     return true;
10221 
10222   const auto *D = cast<FunctionDecl>(GD.getDecl());
10223   // Do not to emit function if it is marked as declare target as it was already
10224   // emitted.
10225   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10226     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10227       if (auto *F = dyn_cast_or_null<llvm::Function>(
10228               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10229         return !F->isDeclaration();
10230       return false;
10231     }
10232     return true;
10233   }
10234 
10235   return !AlreadyEmittedTargetDecls.insert(D).second;
10236 }
10237 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10238 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10239                                     const OMPExecutableDirective &D,
10240                                     SourceLocation Loc,
10241                                     llvm::Function *OutlinedFn,
10242                                     ArrayRef<llvm::Value *> CapturedVars) {
10243   if (!CGF.HaveInsertPoint())
10244     return;
10245 
10246   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10247   CodeGenFunction::RunCleanupsScope Scope(CGF);
10248 
10249   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10250   llvm::Value *Args[] = {
10251       RTLoc,
10252       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10253       OutlinedFn};
10254   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10255   RealArgs.append(std::begin(Args), std::end(Args));
10256   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10257 
10258   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10259       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10260   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10261 }
10262 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10263 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10264                                          const Expr *NumTeams,
10265                                          const Expr *ThreadLimit,
10266                                          SourceLocation Loc) {
10267   if (!CGF.HaveInsertPoint())
10268     return;
10269 
10270   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10271 
10272   llvm::Value *NumTeamsVal =
10273       NumTeams
10274           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10275                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10276           : CGF.Builder.getInt32(0);
10277 
10278   llvm::Value *ThreadLimitVal =
10279       ThreadLimit
10280           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10281                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10282           : CGF.Builder.getInt32(0);
10283 
10284   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10285   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10286                                      ThreadLimitVal};
10287   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10288                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10289                       PushNumTeamsArgs);
10290 }
10291 
emitThreadLimitClause(CodeGenFunction & CGF,const Expr * ThreadLimit,SourceLocation Loc)10292 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10293                                             const Expr *ThreadLimit,
10294                                             SourceLocation Loc) {
10295   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10296   llvm::Value *ThreadLimitVal =
10297       ThreadLimit
10298           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10299                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10300           : CGF.Builder.getInt32(0);
10301 
10302   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10303   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10304                                     ThreadLimitVal};
10305   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10306                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10307                       ThreadLimitArgs);
10308 }
10309 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)10310 void CGOpenMPRuntime::emitTargetDataCalls(
10311     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10312     const Expr *Device, const RegionCodeGenTy &CodeGen,
10313     CGOpenMPRuntime::TargetDataInfo &Info) {
10314   if (!CGF.HaveInsertPoint())
10315     return;
10316 
10317   // Action used to replace the default codegen action and turn privatization
10318   // off.
10319   PrePostActionTy NoPrivAction;
10320 
10321   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10322 
10323   llvm::Value *IfCondVal = nullptr;
10324   if (IfCond)
10325     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10326 
10327   // Emit device ID if any.
10328   llvm::Value *DeviceID = nullptr;
10329   if (Device) {
10330     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10331                                          CGF.Int64Ty, /*isSigned=*/true);
10332   } else {
10333     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10334   }
10335 
10336   // Fill up the arrays with all the mapped variables.
10337   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10338   auto GenMapInfoCB =
10339       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10340     CGF.Builder.restoreIP(CodeGenIP);
10341     // Get map clause information.
10342     MappableExprsHandler MEHandler(D, CGF);
10343     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10344 
10345     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10346       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10347     };
10348     if (CGM.getCodeGenOpts().getDebugInfo() !=
10349         llvm::codegenoptions::NoDebugInfo) {
10350       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10351       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10352                       FillInfoMap);
10353     }
10354 
10355     return CombinedInfo;
10356   };
10357   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10358   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10359     CGF.Builder.restoreIP(CodeGenIP);
10360     switch (BodyGenType) {
10361     case BodyGenTy::Priv:
10362       if (!Info.CaptureDeviceAddrMap.empty())
10363         CodeGen(CGF);
10364       break;
10365     case BodyGenTy::DupNoPriv:
10366       if (!Info.CaptureDeviceAddrMap.empty()) {
10367         CodeGen.setAction(NoPrivAction);
10368         CodeGen(CGF);
10369       }
10370       break;
10371     case BodyGenTy::NoPriv:
10372       if (Info.CaptureDeviceAddrMap.empty()) {
10373         CodeGen.setAction(NoPrivAction);
10374         CodeGen(CGF);
10375       }
10376       break;
10377     }
10378     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10379                          CGF.Builder.GetInsertPoint());
10380   };
10381 
10382   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10383     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10384       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10385     }
10386   };
10387 
10388   auto CustomMapperCB = [&](unsigned int I) {
10389     llvm::Function *MFunc = nullptr;
10390     if (CombinedInfo.Mappers[I]) {
10391       Info.HasMapper = true;
10392       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10393           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10394     }
10395     return MFunc;
10396   };
10397 
10398   // Source location for the ident struct
10399   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10400 
10401   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10402                          CGF.AllocaInsertPt->getIterator());
10403   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10404                           CGF.Builder.GetInsertPoint());
10405   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10406   llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10407       cantFail(OMPBuilder.createTargetData(
10408           OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10409           CustomMapperCB,
10410           /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10411   CGF.Builder.restoreIP(AfterIP);
10412 }
10413 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10414 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10415     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10416     const Expr *Device) {
10417   if (!CGF.HaveInsertPoint())
10418     return;
10419 
10420   assert((isa<OMPTargetEnterDataDirective>(D) ||
10421           isa<OMPTargetExitDataDirective>(D) ||
10422           isa<OMPTargetUpdateDirective>(D)) &&
10423          "Expecting either target enter, exit data, or update directives.");
10424 
10425   CodeGenFunction::OMPTargetDataInfo InputInfo;
10426   llvm::Value *MapTypesArray = nullptr;
10427   llvm::Value *MapNamesArray = nullptr;
10428   // Generate the code for the opening of the data environment.
10429   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10430                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10431     // Emit device ID if any.
10432     llvm::Value *DeviceID = nullptr;
10433     if (Device) {
10434       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10435                                            CGF.Int64Ty, /*isSigned=*/true);
10436     } else {
10437       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10438     }
10439 
10440     // Emit the number of elements in the offloading arrays.
10441     llvm::Constant *PointerNum =
10442         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10443 
10444     // Source location for the ident struct
10445     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10446 
10447     SmallVector<llvm::Value *, 13> OffloadingArgs(
10448         {RTLoc, DeviceID, PointerNum,
10449          InputInfo.BasePointersArray.emitRawPointer(CGF),
10450          InputInfo.PointersArray.emitRawPointer(CGF),
10451          InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10452          InputInfo.MappersArray.emitRawPointer(CGF)});
10453 
10454     // Select the right runtime function call for each standalone
10455     // directive.
10456     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10457     RuntimeFunction RTLFn;
10458     switch (D.getDirectiveKind()) {
10459     case OMPD_target_enter_data:
10460       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10461                         : OMPRTL___tgt_target_data_begin_mapper;
10462       break;
10463     case OMPD_target_exit_data:
10464       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10465                         : OMPRTL___tgt_target_data_end_mapper;
10466       break;
10467     case OMPD_target_update:
10468       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10469                         : OMPRTL___tgt_target_data_update_mapper;
10470       break;
10471     case OMPD_parallel:
10472     case OMPD_for:
10473     case OMPD_parallel_for:
10474     case OMPD_parallel_master:
10475     case OMPD_parallel_sections:
10476     case OMPD_for_simd:
10477     case OMPD_parallel_for_simd:
10478     case OMPD_cancel:
10479     case OMPD_cancellation_point:
10480     case OMPD_ordered:
10481     case OMPD_threadprivate:
10482     case OMPD_allocate:
10483     case OMPD_task:
10484     case OMPD_simd:
10485     case OMPD_tile:
10486     case OMPD_unroll:
10487     case OMPD_sections:
10488     case OMPD_section:
10489     case OMPD_single:
10490     case OMPD_master:
10491     case OMPD_critical:
10492     case OMPD_taskyield:
10493     case OMPD_barrier:
10494     case OMPD_taskwait:
10495     case OMPD_taskgroup:
10496     case OMPD_atomic:
10497     case OMPD_flush:
10498     case OMPD_depobj:
10499     case OMPD_scan:
10500     case OMPD_teams:
10501     case OMPD_target_data:
10502     case OMPD_distribute:
10503     case OMPD_distribute_simd:
10504     case OMPD_distribute_parallel_for:
10505     case OMPD_distribute_parallel_for_simd:
10506     case OMPD_teams_distribute:
10507     case OMPD_teams_distribute_simd:
10508     case OMPD_teams_distribute_parallel_for:
10509     case OMPD_teams_distribute_parallel_for_simd:
10510     case OMPD_declare_simd:
10511     case OMPD_declare_variant:
10512     case OMPD_begin_declare_variant:
10513     case OMPD_end_declare_variant:
10514     case OMPD_declare_target:
10515     case OMPD_end_declare_target:
10516     case OMPD_declare_reduction:
10517     case OMPD_declare_mapper:
10518     case OMPD_taskloop:
10519     case OMPD_taskloop_simd:
10520     case OMPD_master_taskloop:
10521     case OMPD_master_taskloop_simd:
10522     case OMPD_parallel_master_taskloop:
10523     case OMPD_parallel_master_taskloop_simd:
10524     case OMPD_target:
10525     case OMPD_target_simd:
10526     case OMPD_target_teams_distribute:
10527     case OMPD_target_teams_distribute_simd:
10528     case OMPD_target_teams_distribute_parallel_for:
10529     case OMPD_target_teams_distribute_parallel_for_simd:
10530     case OMPD_target_teams:
10531     case OMPD_target_parallel:
10532     case OMPD_target_parallel_for:
10533     case OMPD_target_parallel_for_simd:
10534     case OMPD_requires:
10535     case OMPD_metadirective:
10536     case OMPD_unknown:
10537     default:
10538       llvm_unreachable("Unexpected standalone target data directive.");
10539       break;
10540     }
10541     if (HasNowait) {
10542       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10543       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10544       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10545       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10546     }
10547     CGF.EmitRuntimeCall(
10548         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10549         OffloadingArgs);
10550   };
10551 
10552   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10553                           &MapNamesArray](CodeGenFunction &CGF,
10554                                           PrePostActionTy &) {
10555     // Fill up the arrays with all the mapped variables.
10556     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10557     CGOpenMPRuntime::TargetDataInfo Info;
10558     MappableExprsHandler MEHandler(D, CGF);
10559     genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10560     emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10561                                 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10562 
10563     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10564                              D.hasClausesOfKind<OMPNowaitClause>();
10565 
10566     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10567     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10568                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10569     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10570                                       CGM.getPointerAlign());
10571     InputInfo.SizesArray =
10572         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10573     InputInfo.MappersArray =
10574         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10575     MapTypesArray = Info.RTArgs.MapTypesArray;
10576     MapNamesArray = Info.RTArgs.MapNamesArray;
10577     if (RequiresOuterTask)
10578       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10579     else
10580       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10581   };
10582 
10583   if (IfCond) {
10584     emitIfClause(CGF, IfCond, TargetThenGen,
10585                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10586   } else {
10587     RegionCodeGenTy ThenRCG(TargetThenGen);
10588     ThenRCG(CGF);
10589   }
10590 }
10591 
10592 namespace {
10593   /// Kind of parameter in a function with 'declare simd' directive.
10594 enum ParamKindTy {
10595   Linear,
10596   LinearRef,
10597   LinearUVal,
10598   LinearVal,
10599   Uniform,
10600   Vector,
10601 };
10602 /// Attribute set of the parameter.
10603 struct ParamAttrTy {
10604   ParamKindTy Kind = Vector;
10605   llvm::APSInt StrideOrArg;
10606   llvm::APSInt Alignment;
10607   bool HasVarStride = false;
10608 };
10609 } // namespace
10610 
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10611 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10612                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10613   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10614   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10615   // of that clause. The VLEN value must be power of 2.
10616   // In other case the notion of the function`s "characteristic data type" (CDT)
10617   // is used to compute the vector length.
10618   // CDT is defined in the following order:
10619   //   a) For non-void function, the CDT is the return type.
10620   //   b) If the function has any non-uniform, non-linear parameters, then the
10621   //   CDT is the type of the first such parameter.
10622   //   c) If the CDT determined by a) or b) above is struct, union, or class
10623   //   type which is pass-by-value (except for the type that maps to the
10624   //   built-in complex data type), the characteristic data type is int.
10625   //   d) If none of the above three cases is applicable, the CDT is int.
10626   // The VLEN is then determined based on the CDT and the size of vector
10627   // register of that ISA for which current vector version is generated. The
10628   // VLEN is computed using the formula below:
10629   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10630   // where vector register size specified in section 3.2.1 Registers and the
10631   // Stack Frame of original AMD64 ABI document.
10632   QualType RetType = FD->getReturnType();
10633   if (RetType.isNull())
10634     return 0;
10635   ASTContext &C = FD->getASTContext();
10636   QualType CDT;
10637   if (!RetType.isNull() && !RetType->isVoidType()) {
10638     CDT = RetType;
10639   } else {
10640     unsigned Offset = 0;
10641     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10642       if (ParamAttrs[Offset].Kind == Vector)
10643         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10644       ++Offset;
10645     }
10646     if (CDT.isNull()) {
10647       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10648         if (ParamAttrs[I + Offset].Kind == Vector) {
10649           CDT = FD->getParamDecl(I)->getType();
10650           break;
10651         }
10652       }
10653     }
10654   }
10655   if (CDT.isNull())
10656     CDT = C.IntTy;
10657   CDT = CDT->getCanonicalTypeUnqualified();
10658   if (CDT->isRecordType() || CDT->isUnionType())
10659     CDT = C.IntTy;
10660   return C.getTypeSize(CDT);
10661 }
10662 
10663 /// Mangle the parameter part of the vector function name according to
10664 /// their OpenMP classification. The mangling function is defined in
10665 /// section 4.5 of the AAVFABI(2021Q1).
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10666 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10667   SmallString<256> Buffer;
10668   llvm::raw_svector_ostream Out(Buffer);
10669   for (const auto &ParamAttr : ParamAttrs) {
10670     switch (ParamAttr.Kind) {
10671     case Linear:
10672       Out << 'l';
10673       break;
10674     case LinearRef:
10675       Out << 'R';
10676       break;
10677     case LinearUVal:
10678       Out << 'U';
10679       break;
10680     case LinearVal:
10681       Out << 'L';
10682       break;
10683     case Uniform:
10684       Out << 'u';
10685       break;
10686     case Vector:
10687       Out << 'v';
10688       break;
10689     }
10690     if (ParamAttr.HasVarStride)
10691       Out << "s" << ParamAttr.StrideOrArg;
10692     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10693              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10694       // Don't print the step value if it is not present or if it is
10695       // equal to 1.
10696       if (ParamAttr.StrideOrArg < 0)
10697         Out << 'n' << -ParamAttr.StrideOrArg;
10698       else if (ParamAttr.StrideOrArg != 1)
10699         Out << ParamAttr.StrideOrArg;
10700     }
10701 
10702     if (!!ParamAttr.Alignment)
10703       Out << 'a' << ParamAttr.Alignment;
10704   }
10705 
10706   return std::string(Out.str());
10707 }
10708 
10709 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10710 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10711                            const llvm::APSInt &VLENVal,
10712                            ArrayRef<ParamAttrTy> ParamAttrs,
10713                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10714   struct ISADataTy {
10715     char ISA;
10716     unsigned VecRegSize;
10717   };
10718   ISADataTy ISAData[] = {
10719       {
10720           'b', 128
10721       }, // SSE
10722       {
10723           'c', 256
10724       }, // AVX
10725       {
10726           'd', 256
10727       }, // AVX2
10728       {
10729           'e', 512
10730       }, // AVX512
10731   };
10732   llvm::SmallVector<char, 2> Masked;
10733   switch (State) {
10734   case OMPDeclareSimdDeclAttr::BS_Undefined:
10735     Masked.push_back('N');
10736     Masked.push_back('M');
10737     break;
10738   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10739     Masked.push_back('N');
10740     break;
10741   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10742     Masked.push_back('M');
10743     break;
10744   }
10745   for (char Mask : Masked) {
10746     for (const ISADataTy &Data : ISAData) {
10747       SmallString<256> Buffer;
10748       llvm::raw_svector_ostream Out(Buffer);
10749       Out << "_ZGV" << Data.ISA << Mask;
10750       if (!VLENVal) {
10751         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10752         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10753         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10754       } else {
10755         Out << VLENVal;
10756       }
10757       Out << mangleVectorParameters(ParamAttrs);
10758       Out << '_' << Fn->getName();
10759       Fn->addFnAttr(Out.str());
10760     }
10761   }
10762 }
10763 
10764 // This are the Functions that are needed to mangle the name of the
10765 // vector functions generated by the compiler, according to the rules
10766 // defined in the "Vector Function ABI specifications for AArch64",
10767 // available at
10768 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10769 
10770 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
getAArch64MTV(QualType QT,ParamKindTy Kind)10771 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10772   QT = QT.getCanonicalType();
10773 
10774   if (QT->isVoidType())
10775     return false;
10776 
10777   if (Kind == ParamKindTy::Uniform)
10778     return false;
10779 
10780   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10781     return false;
10782 
10783   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10784       !QT->isReferenceType())
10785     return false;
10786 
10787   return true;
10788 }
10789 
10790 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10791 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10792   QT = QT.getCanonicalType();
10793   unsigned Size = C.getTypeSize(QT);
10794 
10795   // Only scalars and complex within 16 bytes wide set PVB to true.
10796   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10797     return false;
10798 
10799   if (QT->isFloatingType())
10800     return true;
10801 
10802   if (QT->isIntegerType())
10803     return true;
10804 
10805   if (QT->isPointerType())
10806     return true;
10807 
10808   // TODO: Add support for complex types (section 3.1.2, item 2).
10809 
10810   return false;
10811 }
10812 
10813 /// Computes the lane size (LS) of a return type or of an input parameter,
10814 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10815 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10816 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10817   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10818     QualType PTy = QT.getCanonicalType()->getPointeeType();
10819     if (getAArch64PBV(PTy, C))
10820       return C.getTypeSize(PTy);
10821   }
10822   if (getAArch64PBV(QT, C))
10823     return C.getTypeSize(QT);
10824 
10825   return C.getTypeSize(C.getUIntPtrType());
10826 }
10827 
10828 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10829 // signature of the scalar function, as defined in 3.2.2 of the
10830 // AAVFABI.
10831 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10832 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10833   QualType RetType = FD->getReturnType().getCanonicalType();
10834 
10835   ASTContext &C = FD->getASTContext();
10836 
10837   bool OutputBecomesInput = false;
10838 
10839   llvm::SmallVector<unsigned, 8> Sizes;
10840   if (!RetType->isVoidType()) {
10841     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10842     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10843       OutputBecomesInput = true;
10844   }
10845   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10846     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10847     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10848   }
10849 
10850   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10851   // The LS of a function parameter / return value can only be a power
10852   // of 2, starting from 8 bits, up to 128.
10853   assert(llvm::all_of(Sizes,
10854                       [](unsigned Size) {
10855                         return Size == 8 || Size == 16 || Size == 32 ||
10856                                Size == 64 || Size == 128;
10857                       }) &&
10858          "Invalid size");
10859 
10860   return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
10861                          OutputBecomesInput);
10862 }
10863 
10864 // Function used to add the attribute. The parameter `VLEN` is
10865 // templated to allow the use of "x" when targeting scalable functions
10866 // for SVE.
10867 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10868 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10869                                  char ISA, StringRef ParSeq,
10870                                  StringRef MangledName, bool OutputBecomesInput,
10871                                  llvm::Function *Fn) {
10872   SmallString<256> Buffer;
10873   llvm::raw_svector_ostream Out(Buffer);
10874   Out << Prefix << ISA << LMask << VLEN;
10875   if (OutputBecomesInput)
10876     Out << "v";
10877   Out << ParSeq << "_" << MangledName;
10878   Fn->addFnAttr(Out.str());
10879 }
10880 
10881 // Helper function to generate the Advanced SIMD names depending on
10882 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10883 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10884                                       StringRef Prefix, char ISA,
10885                                       StringRef ParSeq, StringRef MangledName,
10886                                       bool OutputBecomesInput,
10887                                       llvm::Function *Fn) {
10888   switch (NDS) {
10889   case 8:
10890     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10891                          OutputBecomesInput, Fn);
10892     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10893                          OutputBecomesInput, Fn);
10894     break;
10895   case 16:
10896     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10897                          OutputBecomesInput, Fn);
10898     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10899                          OutputBecomesInput, Fn);
10900     break;
10901   case 32:
10902     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10903                          OutputBecomesInput, Fn);
10904     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10905                          OutputBecomesInput, Fn);
10906     break;
10907   case 64:
10908   case 128:
10909     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10910                          OutputBecomesInput, Fn);
10911     break;
10912   default:
10913     llvm_unreachable("Scalar type is too wide.");
10914   }
10915 }
10916 
10917 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10918 static void emitAArch64DeclareSimdFunction(
10919     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10920     ArrayRef<ParamAttrTy> ParamAttrs,
10921     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10922     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10923 
10924   // Get basic data for building the vector signature.
10925   const auto Data = getNDSWDS(FD, ParamAttrs);
10926   const unsigned NDS = std::get<0>(Data);
10927   const unsigned WDS = std::get<1>(Data);
10928   const bool OutputBecomesInput = std::get<2>(Data);
10929 
10930   // Check the values provided via `simdlen` by the user.
10931   // 1. A `simdlen(1)` doesn't produce vector signatures,
10932   if (UserVLEN == 1) {
10933     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10934         DiagnosticsEngine::Warning,
10935         "The clause simdlen(1) has no effect when targeting aarch64.");
10936     CGM.getDiags().Report(SLoc, DiagID);
10937     return;
10938   }
10939 
10940   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10941   // Advanced SIMD output.
10942   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10943     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10944         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10945                                     "power of 2 when targeting Advanced SIMD.");
10946     CGM.getDiags().Report(SLoc, DiagID);
10947     return;
10948   }
10949 
10950   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10951   // limits.
10952   if (ISA == 's' && UserVLEN != 0) {
10953     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10954       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10955           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10956                                       "lanes in the architectural constraints "
10957                                       "for SVE (min is 128-bit, max is "
10958                                       "2048-bit, by steps of 128-bit)");
10959       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10960       return;
10961     }
10962   }
10963 
10964   // Sort out parameter sequence.
10965   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10966   StringRef Prefix = "_ZGV";
10967   // Generate simdlen from user input (if any).
10968   if (UserVLEN) {
10969     if (ISA == 's') {
10970       // SVE generates only a masked function.
10971       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10972                            OutputBecomesInput, Fn);
10973     } else {
10974       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10975       // Advanced SIMD generates one or two functions, depending on
10976       // the `[not]inbranch` clause.
10977       switch (State) {
10978       case OMPDeclareSimdDeclAttr::BS_Undefined:
10979         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10980                              OutputBecomesInput, Fn);
10981         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10982                              OutputBecomesInput, Fn);
10983         break;
10984       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10985         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10986                              OutputBecomesInput, Fn);
10987         break;
10988       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10989         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10990                              OutputBecomesInput, Fn);
10991         break;
10992       }
10993     }
10994   } else {
10995     // If no user simdlen is provided, follow the AAVFABI rules for
10996     // generating the vector length.
10997     if (ISA == 's') {
10998       // SVE, section 3.4.1, item 1.
10999       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11000                            OutputBecomesInput, Fn);
11001     } else {
11002       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11003       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11004       // two vector names depending on the use of the clause
11005       // `[not]inbranch`.
11006       switch (State) {
11007       case OMPDeclareSimdDeclAttr::BS_Undefined:
11008         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11009                                   OutputBecomesInput, Fn);
11010         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11011                                   OutputBecomesInput, Fn);
11012         break;
11013       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11014         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11015                                   OutputBecomesInput, Fn);
11016         break;
11017       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11018         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11019                                   OutputBecomesInput, Fn);
11020         break;
11021       }
11022     }
11023   }
11024 }
11025 
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)11026 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11027                                               llvm::Function *Fn) {
11028   ASTContext &C = CGM.getContext();
11029   FD = FD->getMostRecentDecl();
11030   while (FD) {
11031     // Map params to their positions in function decl.
11032     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11033     if (isa<CXXMethodDecl>(FD))
11034       ParamPositions.try_emplace(FD, 0);
11035     unsigned ParamPos = ParamPositions.size();
11036     for (const ParmVarDecl *P : FD->parameters()) {
11037       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11038       ++ParamPos;
11039     }
11040     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11041       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11042       // Mark uniform parameters.
11043       for (const Expr *E : Attr->uniforms()) {
11044         E = E->IgnoreParenImpCasts();
11045         unsigned Pos;
11046         if (isa<CXXThisExpr>(E)) {
11047           Pos = ParamPositions[FD];
11048         } else {
11049           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11050                                 ->getCanonicalDecl();
11051           auto It = ParamPositions.find(PVD);
11052           assert(It != ParamPositions.end() && "Function parameter not found");
11053           Pos = It->second;
11054         }
11055         ParamAttrs[Pos].Kind = Uniform;
11056       }
11057       // Get alignment info.
11058       auto *NI = Attr->alignments_begin();
11059       for (const Expr *E : Attr->aligneds()) {
11060         E = E->IgnoreParenImpCasts();
11061         unsigned Pos;
11062         QualType ParmTy;
11063         if (isa<CXXThisExpr>(E)) {
11064           Pos = ParamPositions[FD];
11065           ParmTy = E->getType();
11066         } else {
11067           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11068                                 ->getCanonicalDecl();
11069           auto It = ParamPositions.find(PVD);
11070           assert(It != ParamPositions.end() && "Function parameter not found");
11071           Pos = It->second;
11072           ParmTy = PVD->getType();
11073         }
11074         ParamAttrs[Pos].Alignment =
11075             (*NI)
11076                 ? (*NI)->EvaluateKnownConstInt(C)
11077                 : llvm::APSInt::getUnsigned(
11078                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11079                           .getQuantity());
11080         ++NI;
11081       }
11082       // Mark linear parameters.
11083       auto *SI = Attr->steps_begin();
11084       auto *MI = Attr->modifiers_begin();
11085       for (const Expr *E : Attr->linears()) {
11086         E = E->IgnoreParenImpCasts();
11087         unsigned Pos;
11088         bool IsReferenceType = false;
11089         // Rescaling factor needed to compute the linear parameter
11090         // value in the mangled name.
11091         unsigned PtrRescalingFactor = 1;
11092         if (isa<CXXThisExpr>(E)) {
11093           Pos = ParamPositions[FD];
11094           auto *P = cast<PointerType>(E->getType());
11095           PtrRescalingFactor = CGM.getContext()
11096                                    .getTypeSizeInChars(P->getPointeeType())
11097                                    .getQuantity();
11098         } else {
11099           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11100                                 ->getCanonicalDecl();
11101           auto It = ParamPositions.find(PVD);
11102           assert(It != ParamPositions.end() && "Function parameter not found");
11103           Pos = It->second;
11104           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11105             PtrRescalingFactor = CGM.getContext()
11106                                      .getTypeSizeInChars(P->getPointeeType())
11107                                      .getQuantity();
11108           else if (PVD->getType()->isReferenceType()) {
11109             IsReferenceType = true;
11110             PtrRescalingFactor =
11111                 CGM.getContext()
11112                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11113                     .getQuantity();
11114           }
11115         }
11116         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11117         if (*MI == OMPC_LINEAR_ref)
11118           ParamAttr.Kind = LinearRef;
11119         else if (*MI == OMPC_LINEAR_uval)
11120           ParamAttr.Kind = LinearUVal;
11121         else if (IsReferenceType)
11122           ParamAttr.Kind = LinearVal;
11123         else
11124           ParamAttr.Kind = Linear;
11125         // Assuming a stride of 1, for `linear` without modifiers.
11126         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11127         if (*SI) {
11128           Expr::EvalResult Result;
11129           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11130             if (const auto *DRE =
11131                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11132               if (const auto *StridePVD =
11133                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11134                 ParamAttr.HasVarStride = true;
11135                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11136                 assert(It != ParamPositions.end() &&
11137                        "Function parameter not found");
11138                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11139               }
11140             }
11141           } else {
11142             ParamAttr.StrideOrArg = Result.Val.getInt();
11143           }
11144         }
11145         // If we are using a linear clause on a pointer, we need to
11146         // rescale the value of linear_step with the byte size of the
11147         // pointee type.
11148         if (!ParamAttr.HasVarStride &&
11149             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11150           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11151         ++SI;
11152         ++MI;
11153       }
11154       llvm::APSInt VLENVal;
11155       SourceLocation ExprLoc;
11156       const Expr *VLENExpr = Attr->getSimdlen();
11157       if (VLENExpr) {
11158         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11159         ExprLoc = VLENExpr->getExprLoc();
11160       }
11161       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11162       if (CGM.getTriple().isX86()) {
11163         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11164       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11165         unsigned VLEN = VLENVal.getExtValue();
11166         StringRef MangledName = Fn->getName();
11167         if (CGM.getTarget().hasFeature("sve"))
11168           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11169                                          MangledName, 's', 128, Fn, ExprLoc);
11170         else if (CGM.getTarget().hasFeature("neon"))
11171           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11172                                          MangledName, 'n', 128, Fn, ExprLoc);
11173       }
11174     }
11175     FD = FD->getPreviousDecl();
11176   }
11177 }
11178 
11179 namespace {
11180 /// Cleanup action for doacross support.
11181 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11182 public:
11183   static const int DoacrossFinArgs = 2;
11184 
11185 private:
11186   llvm::FunctionCallee RTLFn;
11187   llvm::Value *Args[DoacrossFinArgs];
11188 
11189 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11190   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11191                     ArrayRef<llvm::Value *> CallArgs)
11192       : RTLFn(RTLFn) {
11193     assert(CallArgs.size() == DoacrossFinArgs);
11194     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11195   }
Emit(CodeGenFunction & CGF,Flags)11196   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11197     if (!CGF.HaveInsertPoint())
11198       return;
11199     CGF.EmitRuntimeCall(RTLFn, Args);
11200   }
11201 };
11202 } // namespace
11203 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11204 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11205                                        const OMPLoopDirective &D,
11206                                        ArrayRef<Expr *> NumIterations) {
11207   if (!CGF.HaveInsertPoint())
11208     return;
11209 
11210   ASTContext &C = CGM.getContext();
11211   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11212   RecordDecl *RD;
11213   if (KmpDimTy.isNull()) {
11214     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11215     //  kmp_int64 lo; // lower
11216     //  kmp_int64 up; // upper
11217     //  kmp_int64 st; // stride
11218     // };
11219     RD = C.buildImplicitRecord("kmp_dim");
11220     RD->startDefinition();
11221     addFieldToRecordDecl(C, RD, Int64Ty);
11222     addFieldToRecordDecl(C, RD, Int64Ty);
11223     addFieldToRecordDecl(C, RD, Int64Ty);
11224     RD->completeDefinition();
11225     KmpDimTy = C.getRecordType(RD);
11226   } else {
11227     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11228   }
11229   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11230   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11231                                             ArraySizeModifier::Normal, 0);
11232 
11233   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11234   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11235   enum { LowerFD = 0, UpperFD, StrideFD };
11236   // Fill dims with data.
11237   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11238     LValue DimsLVal = CGF.MakeAddrLValue(
11239         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11240     // dims.upper = num_iterations;
11241     LValue UpperLVal = CGF.EmitLValueForField(
11242         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11243     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11244         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11245         Int64Ty, NumIterations[I]->getExprLoc());
11246     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11247     // dims.stride = 1;
11248     LValue StrideLVal = CGF.EmitLValueForField(
11249         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11250     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11251                           StrideLVal);
11252   }
11253 
11254   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11255   // kmp_int32 num_dims, struct kmp_dim * dims);
11256   llvm::Value *Args[] = {
11257       emitUpdateLocation(CGF, D.getBeginLoc()),
11258       getThreadID(CGF, D.getBeginLoc()),
11259       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11260       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11261           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11262           CGM.VoidPtrTy)};
11263 
11264   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11265       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11266   CGF.EmitRuntimeCall(RTLFn, Args);
11267   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11268       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11269   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11270       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11271   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11272                                              llvm::ArrayRef(FiniArgs));
11273 }
11274 
11275 template <typename T>
EmitDoacrossOrdered(CodeGenFunction & CGF,CodeGenModule & CGM,const T * C,llvm::Value * ULoc,llvm::Value * ThreadID)11276 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11277                                 const T *C, llvm::Value *ULoc,
11278                                 llvm::Value *ThreadID) {
11279   QualType Int64Ty =
11280       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11281   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11282   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11283       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11284   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11285   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11286     const Expr *CounterVal = C->getLoopData(I);
11287     assert(CounterVal);
11288     llvm::Value *CntVal = CGF.EmitScalarConversion(
11289         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11290         CounterVal->getExprLoc());
11291     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11292                           /*Volatile=*/false, Int64Ty);
11293   }
11294   llvm::Value *Args[] = {
11295       ULoc, ThreadID,
11296       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11297   llvm::FunctionCallee RTLFn;
11298   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11299   OMPDoacrossKind<T> ODK;
11300   if (ODK.isSource(C)) {
11301     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11302                                                   OMPRTL___kmpc_doacross_post);
11303   } else {
11304     assert(ODK.isSink(C) && "Expect sink modifier.");
11305     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11306                                                   OMPRTL___kmpc_doacross_wait);
11307   }
11308   CGF.EmitRuntimeCall(RTLFn, Args);
11309 }
11310 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11311 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11312                                           const OMPDependClause *C) {
11313   return EmitDoacrossOrdered<OMPDependClause>(
11314       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11315       getThreadID(CGF, C->getBeginLoc()));
11316 }
11317 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)11318 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11319                                           const OMPDoacrossClause *C) {
11320   return EmitDoacrossOrdered<OMPDoacrossClause>(
11321       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11322       getThreadID(CGF, C->getBeginLoc()));
11323 }
11324 
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11325 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11326                                llvm::FunctionCallee Callee,
11327                                ArrayRef<llvm::Value *> Args) const {
11328   assert(Loc.isValid() && "Outlined function call location must be valid.");
11329   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11330 
11331   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11332     if (Fn->doesNotThrow()) {
11333       CGF.EmitNounwindRuntimeCall(Fn, Args);
11334       return;
11335     }
11336   }
11337   CGF.EmitRuntimeCall(Callee, Args);
11338 }
11339 
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11340 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11341     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11342     ArrayRef<llvm::Value *> Args) const {
11343   emitCall(CGF, Loc, OutlinedFn, Args);
11344 }
11345 
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11346 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11347   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11348     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11349       HasEmittedDeclareTargetRegion = true;
11350 }
11351 
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11352 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11353                                              const VarDecl *NativeParam,
11354                                              const VarDecl *TargetParam) const {
11355   return CGF.GetAddrOfLocalVar(NativeParam);
11356 }
11357 
11358 /// Return allocator value from expression, or return a null allocator (default
11359 /// when no allocator specified).
getAllocatorVal(CodeGenFunction & CGF,const Expr * Allocator)11360 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11361                                     const Expr *Allocator) {
11362   llvm::Value *AllocVal;
11363   if (Allocator) {
11364     AllocVal = CGF.EmitScalarExpr(Allocator);
11365     // According to the standard, the original allocator type is a enum
11366     // (integer). Convert to pointer type, if required.
11367     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11368                                         CGF.getContext().VoidPtrTy,
11369                                         Allocator->getExprLoc());
11370   } else {
11371     // If no allocator specified, it defaults to the null allocator.
11372     AllocVal = llvm::Constant::getNullValue(
11373         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11374   }
11375   return AllocVal;
11376 }
11377 
11378 /// Return the alignment from an allocate directive if present.
getAlignmentValue(CodeGenModule & CGM,const VarDecl * VD)11379 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11380   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11381 
11382   if (!AllocateAlignment)
11383     return nullptr;
11384 
11385   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11386 }
11387 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11388 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11389                                                    const VarDecl *VD) {
11390   if (!VD)
11391     return Address::invalid();
11392   Address UntiedAddr = Address::invalid();
11393   Address UntiedRealAddr = Address::invalid();
11394   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11395   if (It != FunctionToUntiedTaskStackMap.end()) {
11396     const UntiedLocalVarsAddressesMap &UntiedData =
11397         UntiedLocalVarsStack[It->second];
11398     auto I = UntiedData.find(VD);
11399     if (I != UntiedData.end()) {
11400       UntiedAddr = I->second.first;
11401       UntiedRealAddr = I->second.second;
11402     }
11403   }
11404   const VarDecl *CVD = VD->getCanonicalDecl();
11405   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11406     // Use the default allocation.
11407     if (!isAllocatableDecl(VD))
11408       return UntiedAddr;
11409     llvm::Value *Size;
11410     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11411     if (CVD->getType()->isVariablyModifiedType()) {
11412       Size = CGF.getTypeSize(CVD->getType());
11413       // Align the size: ((size + align - 1) / align) * align
11414       Size = CGF.Builder.CreateNUWAdd(
11415           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11416       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11417       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11418     } else {
11419       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11420       Size = CGM.getSize(Sz.alignTo(Align));
11421     }
11422     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11423     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11424     const Expr *Allocator = AA->getAllocator();
11425     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11426     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11427     SmallVector<llvm::Value *, 4> Args;
11428     Args.push_back(ThreadID);
11429     if (Alignment)
11430       Args.push_back(Alignment);
11431     Args.push_back(Size);
11432     Args.push_back(AllocVal);
11433     llvm::omp::RuntimeFunction FnID =
11434         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11435     llvm::Value *Addr = CGF.EmitRuntimeCall(
11436         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11437         getName({CVD->getName(), ".void.addr"}));
11438     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11439         CGM.getModule(), OMPRTL___kmpc_free);
11440     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11441     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11442         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11443     if (UntiedAddr.isValid())
11444       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11445 
11446     // Cleanup action for allocate support.
11447     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11448       llvm::FunctionCallee RTLFn;
11449       SourceLocation::UIntTy LocEncoding;
11450       Address Addr;
11451       const Expr *AllocExpr;
11452 
11453     public:
11454       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11455                            SourceLocation::UIntTy LocEncoding, Address Addr,
11456                            const Expr *AllocExpr)
11457           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11458             AllocExpr(AllocExpr) {}
11459       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11460         if (!CGF.HaveInsertPoint())
11461           return;
11462         llvm::Value *Args[3];
11463         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11464             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11465         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11466             Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11467         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11468         Args[2] = AllocVal;
11469         CGF.EmitRuntimeCall(RTLFn, Args);
11470       }
11471     };
11472     Address VDAddr =
11473         UntiedRealAddr.isValid()
11474             ? UntiedRealAddr
11475             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11476     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11477         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11478         VDAddr, Allocator);
11479     if (UntiedRealAddr.isValid())
11480       if (auto *Region =
11481               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11482         Region->emitUntiedSwitch(CGF);
11483     return VDAddr;
11484   }
11485   return UntiedAddr;
11486 }
11487 
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const11488 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11489                                              const VarDecl *VD) const {
11490   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11491   if (It == FunctionToUntiedTaskStackMap.end())
11492     return false;
11493   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11494 }
11495 
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11496 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11497     CodeGenModule &CGM, const OMPLoopDirective &S)
11498     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11499   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11500   if (!NeedToPush)
11501     return;
11502   NontemporalDeclsSet &DS =
11503       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11504   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11505     for (const Stmt *Ref : C->private_refs()) {
11506       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11507       const ValueDecl *VD;
11508       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11509         VD = DRE->getDecl();
11510       } else {
11511         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11512         assert((ME->isImplicitCXXThis() ||
11513                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11514                "Expected member of current class.");
11515         VD = ME->getMemberDecl();
11516       }
11517       DS.insert(VD);
11518     }
11519   }
11520 }
11521 
~NontemporalDeclsRAII()11522 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11523   if (!NeedToPush)
11524     return;
11525   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11526 }
11527 
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)11528 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11529     CodeGenFunction &CGF,
11530     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11531                           std::pair<Address, Address>> &LocalVars)
11532     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11533   if (!NeedToPush)
11534     return;
11535   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11536       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11537   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11538 }
11539 
~UntiedTaskLocalDeclsRAII()11540 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11541   if (!NeedToPush)
11542     return;
11543   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11544 }
11545 
isNontemporalDecl(const ValueDecl * VD) const11546 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11547   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11548 
11549   return llvm::any_of(
11550       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11551       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11552 }
11553 
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11554 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11555     const OMPExecutableDirective &S,
11556     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11557     const {
11558   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11559   // Vars in target/task regions must be excluded completely.
11560   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11561       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11562     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11563     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11564     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11565     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11566       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11567         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11568     }
11569   }
11570   // Exclude vars in private clauses.
11571   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11572     for (const Expr *Ref : C->varlist()) {
11573       if (!Ref->getType()->isScalarType())
11574         continue;
11575       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11576       if (!DRE)
11577         continue;
11578       NeedToCheckForLPCs.insert(DRE->getDecl());
11579     }
11580   }
11581   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11582     for (const Expr *Ref : C->varlist()) {
11583       if (!Ref->getType()->isScalarType())
11584         continue;
11585       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11586       if (!DRE)
11587         continue;
11588       NeedToCheckForLPCs.insert(DRE->getDecl());
11589     }
11590   }
11591   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11592     for (const Expr *Ref : C->varlist()) {
11593       if (!Ref->getType()->isScalarType())
11594         continue;
11595       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11596       if (!DRE)
11597         continue;
11598       NeedToCheckForLPCs.insert(DRE->getDecl());
11599     }
11600   }
11601   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11602     for (const Expr *Ref : C->varlist()) {
11603       if (!Ref->getType()->isScalarType())
11604         continue;
11605       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11606       if (!DRE)
11607         continue;
11608       NeedToCheckForLPCs.insert(DRE->getDecl());
11609     }
11610   }
11611   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11612     for (const Expr *Ref : C->varlist()) {
11613       if (!Ref->getType()->isScalarType())
11614         continue;
11615       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11616       if (!DRE)
11617         continue;
11618       NeedToCheckForLPCs.insert(DRE->getDecl());
11619     }
11620   }
11621   for (const Decl *VD : NeedToCheckForLPCs) {
11622     for (const LastprivateConditionalData &Data :
11623          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11624       if (Data.DeclToUniqueName.count(VD) > 0) {
11625         if (!Data.Disabled)
11626           NeedToAddForLPCsAsDisabled.insert(VD);
11627         break;
11628       }
11629     }
11630   }
11631 }
11632 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11633 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11634     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11635     : CGM(CGF.CGM),
11636       Action((CGM.getLangOpts().OpenMP >= 50 &&
11637               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11638                            [](const OMPLastprivateClause *C) {
11639                              return C->getKind() ==
11640                                     OMPC_LASTPRIVATE_conditional;
11641                            }))
11642                  ? ActionToDo::PushAsLastprivateConditional
11643                  : ActionToDo::DoNotPush) {
11644   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11645   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11646     return;
11647   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11648          "Expected a push action.");
11649   LastprivateConditionalData &Data =
11650       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11651   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11652     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11653       continue;
11654 
11655     for (const Expr *Ref : C->varlist()) {
11656       Data.DeclToUniqueName.insert(std::make_pair(
11657           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11658           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11659     }
11660   }
11661   Data.IVLVal = IVLVal;
11662   Data.Fn = CGF.CurFn;
11663 }
11664 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11665 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11666     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11667     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11668   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11669   if (CGM.getLangOpts().OpenMP < 50)
11670     return;
11671   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11672   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11673   if (!NeedToAddForLPCsAsDisabled.empty()) {
11674     Action = ActionToDo::DisableLastprivateConditional;
11675     LastprivateConditionalData &Data =
11676         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11677     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11678       Data.DeclToUniqueName.try_emplace(VD);
11679     Data.Fn = CGF.CurFn;
11680     Data.Disabled = true;
11681   }
11682 }
11683 
11684 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11685 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11686     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11687   return LastprivateConditionalRAII(CGF, S);
11688 }
11689 
~LastprivateConditionalRAII()11690 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11691   if (CGM.getLangOpts().OpenMP < 50)
11692     return;
11693   if (Action == ActionToDo::DisableLastprivateConditional) {
11694     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11695            "Expected list of disabled private vars.");
11696     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11697   }
11698   if (Action == ActionToDo::PushAsLastprivateConditional) {
11699     assert(
11700         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11701         "Expected list of lastprivate conditional vars.");
11702     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11703   }
11704 }
11705 
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11706 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11707                                                         const VarDecl *VD) {
11708   ASTContext &C = CGM.getContext();
11709   auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11710   QualType NewType;
11711   const FieldDecl *VDField;
11712   const FieldDecl *FiredField;
11713   LValue BaseLVal;
11714   auto VI = I->getSecond().find(VD);
11715   if (VI == I->getSecond().end()) {
11716     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11717     RD->startDefinition();
11718     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11719     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11720     RD->completeDefinition();
11721     NewType = C.getRecordType(RD);
11722     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11723     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11724     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11725   } else {
11726     NewType = std::get<0>(VI->getSecond());
11727     VDField = std::get<1>(VI->getSecond());
11728     FiredField = std::get<2>(VI->getSecond());
11729     BaseLVal = std::get<3>(VI->getSecond());
11730   }
11731   LValue FiredLVal =
11732       CGF.EmitLValueForField(BaseLVal, FiredField);
11733   CGF.EmitStoreOfScalar(
11734       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11735       FiredLVal);
11736   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11737 }
11738 
11739 namespace {
11740 /// Checks if the lastprivate conditional variable is referenced in LHS.
11741 class LastprivateConditionalRefChecker final
11742     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11743   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11744   const Expr *FoundE = nullptr;
11745   const Decl *FoundD = nullptr;
11746   StringRef UniqueDeclName;
11747   LValue IVLVal;
11748   llvm::Function *FoundFn = nullptr;
11749   SourceLocation Loc;
11750 
11751 public:
VisitDeclRefExpr(const DeclRefExpr * E)11752   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11753     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11754          llvm::reverse(LPM)) {
11755       auto It = D.DeclToUniqueName.find(E->getDecl());
11756       if (It == D.DeclToUniqueName.end())
11757         continue;
11758       if (D.Disabled)
11759         return false;
11760       FoundE = E;
11761       FoundD = E->getDecl()->getCanonicalDecl();
11762       UniqueDeclName = It->second;
11763       IVLVal = D.IVLVal;
11764       FoundFn = D.Fn;
11765       break;
11766     }
11767     return FoundE == E;
11768   }
VisitMemberExpr(const MemberExpr * E)11769   bool VisitMemberExpr(const MemberExpr *E) {
11770     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11771       return false;
11772     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11773          llvm::reverse(LPM)) {
11774       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11775       if (It == D.DeclToUniqueName.end())
11776         continue;
11777       if (D.Disabled)
11778         return false;
11779       FoundE = E;
11780       FoundD = E->getMemberDecl()->getCanonicalDecl();
11781       UniqueDeclName = It->second;
11782       IVLVal = D.IVLVal;
11783       FoundFn = D.Fn;
11784       break;
11785     }
11786     return FoundE == E;
11787   }
VisitStmt(const Stmt * S)11788   bool VisitStmt(const Stmt *S) {
11789     for (const Stmt *Child : S->children()) {
11790       if (!Child)
11791         continue;
11792       if (const auto *E = dyn_cast<Expr>(Child))
11793         if (!E->isGLValue())
11794           continue;
11795       if (Visit(Child))
11796         return true;
11797     }
11798     return false;
11799   }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11800   explicit LastprivateConditionalRefChecker(
11801       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11802       : LPM(LPM) {}
11803   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11804   getFoundData() const {
11805     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11806   }
11807 };
11808 } // namespace
11809 
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11810 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11811                                                        LValue IVLVal,
11812                                                        StringRef UniqueDeclName,
11813                                                        LValue LVal,
11814                                                        SourceLocation Loc) {
11815   // Last updated loop counter for the lastprivate conditional var.
11816   // int<xx> last_iv = 0;
11817   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11818   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11819       LLIVTy, getName({UniqueDeclName, "iv"}));
11820   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11821       IVLVal.getAlignment().getAsAlign());
11822   LValue LastIVLVal =
11823       CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11824 
11825   // Last value of the lastprivate conditional.
11826   // decltype(priv_a) last_a;
11827   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11828       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11829   cast<llvm::GlobalVariable>(Last)->setAlignment(
11830       LVal.getAlignment().getAsAlign());
11831   LValue LastLVal =
11832       CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11833 
11834   // Global loop counter. Required to handle inner parallel-for regions.
11835   // iv
11836   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11837 
11838   // #pragma omp critical(a)
11839   // if (last_iv <= iv) {
11840   //   last_iv = iv;
11841   //   last_a = priv_a;
11842   // }
11843   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11844                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11845     Action.Enter(CGF);
11846     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11847     // (last_iv <= iv) ? Check if the variable is updated and store new
11848     // value in global var.
11849     llvm::Value *CmpRes;
11850     if (IVLVal.getType()->isSignedIntegerType()) {
11851       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11852     } else {
11853       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11854              "Loop iteration variable must be integer.");
11855       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11856     }
11857     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11858     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11859     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11860     // {
11861     CGF.EmitBlock(ThenBB);
11862 
11863     //   last_iv = iv;
11864     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11865 
11866     //   last_a = priv_a;
11867     switch (CGF.getEvaluationKind(LVal.getType())) {
11868     case TEK_Scalar: {
11869       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11870       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11871       break;
11872     }
11873     case TEK_Complex: {
11874       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11875       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11876       break;
11877     }
11878     case TEK_Aggregate:
11879       llvm_unreachable(
11880           "Aggregates are not supported in lastprivate conditional.");
11881     }
11882     // }
11883     CGF.EmitBranch(ExitBB);
11884     // There is no need to emit line number for unconditional branch.
11885     (void)ApplyDebugLocation::CreateEmpty(CGF);
11886     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11887   };
11888 
11889   if (CGM.getLangOpts().OpenMPSimd) {
11890     // Do not emit as a critical region as no parallel region could be emitted.
11891     RegionCodeGenTy ThenRCG(CodeGen);
11892     ThenRCG(CGF);
11893   } else {
11894     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11895   }
11896 }
11897 
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11898 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11899                                                          const Expr *LHS) {
11900   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11901     return;
11902   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11903   if (!Checker.Visit(LHS))
11904     return;
11905   const Expr *FoundE;
11906   const Decl *FoundD;
11907   StringRef UniqueDeclName;
11908   LValue IVLVal;
11909   llvm::Function *FoundFn;
11910   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11911       Checker.getFoundData();
11912   if (FoundFn != CGF.CurFn) {
11913     // Special codegen for inner parallel regions.
11914     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11915     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11916     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11917            "Lastprivate conditional is not found in outer region.");
11918     QualType StructTy = std::get<0>(It->getSecond());
11919     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11920     LValue PrivLVal = CGF.EmitLValue(FoundE);
11921     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11922         PrivLVal.getAddress(),
11923         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11924         CGF.ConvertTypeForMem(StructTy));
11925     LValue BaseLVal =
11926         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11927     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11928     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11929                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11930                         FiredLVal, llvm::AtomicOrdering::Unordered,
11931                         /*IsVolatile=*/true, /*isInit=*/false);
11932     return;
11933   }
11934 
11935   // Private address of the lastprivate conditional in the current context.
11936   // priv_a
11937   LValue LVal = CGF.EmitLValue(FoundE);
11938   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11939                                    FoundE->getExprLoc());
11940 }
11941 
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11942 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11943     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11944     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11945   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11946     return;
11947   auto Range = llvm::reverse(LastprivateConditionalStack);
11948   auto It = llvm::find_if(
11949       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11950   if (It == Range.end() || It->Fn != CGF.CurFn)
11951     return;
11952   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11953   assert(LPCI != LastprivateConditionalToTypes.end() &&
11954          "Lastprivates must be registered already.");
11955   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11956   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11957   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11958   for (const auto &Pair : It->DeclToUniqueName) {
11959     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11960     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11961       continue;
11962     auto I = LPCI->getSecond().find(Pair.first);
11963     assert(I != LPCI->getSecond().end() &&
11964            "Lastprivate must be rehistered already.");
11965     // bool Cmp = priv_a.Fired != 0;
11966     LValue BaseLVal = std::get<3>(I->getSecond());
11967     LValue FiredLVal =
11968         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11969     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11970     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11971     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11972     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11973     // if (Cmp) {
11974     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11975     CGF.EmitBlock(ThenBB);
11976     Address Addr = CGF.GetAddrOfLocalVar(VD);
11977     LValue LVal;
11978     if (VD->getType()->isReferenceType())
11979       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11980                                            AlignmentSource::Decl);
11981     else
11982       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11983                                 AlignmentSource::Decl);
11984     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11985                                      D.getBeginLoc());
11986     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11987     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11988     // }
11989   }
11990 }
11991 
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11992 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11993     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11994     SourceLocation Loc) {
11995   if (CGF.getLangOpts().OpenMP < 50)
11996     return;
11997   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11998   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11999          "Unknown lastprivate conditional variable.");
12000   StringRef UniqueName = It->second;
12001   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12002   // The variable was not updated in the region - exit.
12003   if (!GV)
12004     return;
12005   LValue LPLVal = CGF.MakeRawAddrLValue(
12006       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12007   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12008   CGF.EmitStoreOfScalar(Res, PrivLVal);
12009 }
12010 
emitParallelOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12011 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12012     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12013     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12014     const RegionCodeGenTy &CodeGen) {
12015   llvm_unreachable("Not supported in SIMD-only mode");
12016 }
12017 
emitTeamsOutlinedFunction(CodeGenFunction & CGF,const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12018 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12019     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12020     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12021     const RegionCodeGenTy &CodeGen) {
12022   llvm_unreachable("Not supported in SIMD-only mode");
12023 }
12024 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)12025 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12026     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12027     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12028     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12029     bool Tied, unsigned &NumberOfParts) {
12030   llvm_unreachable("Not supported in SIMD-only mode");
12031 }
12032 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond,llvm::Value * NumThreads)12033 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12034                                            SourceLocation Loc,
12035                                            llvm::Function *OutlinedFn,
12036                                            ArrayRef<llvm::Value *> CapturedVars,
12037                                            const Expr *IfCond,
12038                                            llvm::Value *NumThreads) {
12039   llvm_unreachable("Not supported in SIMD-only mode");
12040 }
12041 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)12042 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12043     CodeGenFunction &CGF, StringRef CriticalName,
12044     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12045     const Expr *Hint) {
12046   llvm_unreachable("Not supported in SIMD-only mode");
12047 }
12048 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)12049 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12050                                            const RegionCodeGenTy &MasterOpGen,
12051                                            SourceLocation Loc) {
12052   llvm_unreachable("Not supported in SIMD-only mode");
12053 }
12054 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)12055 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12056                                            const RegionCodeGenTy &MasterOpGen,
12057                                            SourceLocation Loc,
12058                                            const Expr *Filter) {
12059   llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)12062 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12063                                             SourceLocation Loc) {
12064   llvm_unreachable("Not supported in SIMD-only mode");
12065 }
12066 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12067 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12068     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12069     SourceLocation Loc) {
12070   llvm_unreachable("Not supported in SIMD-only mode");
12071 }
12072 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12073 void CGOpenMPSIMDRuntime::emitSingleRegion(
12074     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12075     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12076     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12077     ArrayRef<const Expr *> AssignmentOps) {
12078   llvm_unreachable("Not supported in SIMD-only mode");
12079 }
12080 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12081 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12082                                             const RegionCodeGenTy &OrderedOpGen,
12083                                             SourceLocation Loc,
12084                                             bool IsThreads) {
12085   llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12088 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12089                                           SourceLocation Loc,
12090                                           OpenMPDirectiveKind Kind,
12091                                           bool EmitChecks,
12092                                           bool ForceSimpleCall) {
12093   llvm_unreachable("Not supported in SIMD-only mode");
12094 }
12095 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12096 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12097     CodeGenFunction &CGF, SourceLocation Loc,
12098     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12099     bool Ordered, const DispatchRTInput &DispatchValues) {
12100   llvm_unreachable("Not supported in SIMD-only mode");
12101 }
12102 
emitForDispatchDeinit(CodeGenFunction & CGF,SourceLocation Loc)12103 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12104                                                 SourceLocation Loc) {
12105   llvm_unreachable("Not supported in SIMD-only mode");
12106 }
12107 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12108 void CGOpenMPSIMDRuntime::emitForStaticInit(
12109     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12110     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12111   llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12114 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12115     CodeGenFunction &CGF, SourceLocation Loc,
12116     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12117   llvm_unreachable("Not supported in SIMD-only mode");
12118 }
12119 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12120 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12121                                                      SourceLocation Loc,
12122                                                      unsigned IVSize,
12123                                                      bool IVSigned) {
12124   llvm_unreachable("Not supported in SIMD-only mode");
12125 }
12126 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12127 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12128                                               SourceLocation Loc,
12129                                               OpenMPDirectiveKind DKind) {
12130   llvm_unreachable("Not supported in SIMD-only mode");
12131 }
12132 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12133 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12134                                               SourceLocation Loc,
12135                                               unsigned IVSize, bool IVSigned,
12136                                               Address IL, Address LB,
12137                                               Address UB, Address ST) {
12138   llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12141 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12142                                                llvm::Value *NumThreads,
12143                                                SourceLocation Loc) {
12144   llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12147 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12148                                              ProcBindKind ProcBind,
12149                                              SourceLocation Loc) {
12150   llvm_unreachable("Not supported in SIMD-only mode");
12151 }
12152 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12153 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12154                                                     const VarDecl *VD,
12155                                                     Address VDAddr,
12156                                                     SourceLocation Loc) {
12157   llvm_unreachable("Not supported in SIMD-only mode");
12158 }
12159 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12160 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12161     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12162     CodeGenFunction *CGF) {
12163   llvm_unreachable("Not supported in SIMD-only mode");
12164 }
12165 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12166 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12167     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12168   llvm_unreachable("Not supported in SIMD-only mode");
12169 }
12170 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12171 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12172                                     ArrayRef<const Expr *> Vars,
12173                                     SourceLocation Loc,
12174                                     llvm::AtomicOrdering AO) {
12175   llvm_unreachable("Not supported in SIMD-only mode");
12176 }
12177 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12178 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12179                                        const OMPExecutableDirective &D,
12180                                        llvm::Function *TaskFunction,
12181                                        QualType SharedsTy, Address Shareds,
12182                                        const Expr *IfCond,
12183                                        const OMPTaskDataTy &Data) {
12184   llvm_unreachable("Not supported in SIMD-only mode");
12185 }
12186 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12187 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12188     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12189     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12190     const Expr *IfCond, const OMPTaskDataTy &Data) {
12191   llvm_unreachable("Not supported in SIMD-only mode");
12192 }
12193 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12194 void CGOpenMPSIMDRuntime::emitReduction(
12195     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12196     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12197     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12198   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12199   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12200                                  ReductionOps, Options);
12201 }
12202 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12203 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12204     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12205     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12206   llvm_unreachable("Not supported in SIMD-only mode");
12207 }
12208 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12209 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12210                                                 SourceLocation Loc,
12211                                                 bool IsWorksharingReduction) {
12212   llvm_unreachable("Not supported in SIMD-only mode");
12213 }
12214 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12215 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12216                                                   SourceLocation Loc,
12217                                                   ReductionCodeGen &RCG,
12218                                                   unsigned N) {
12219   llvm_unreachable("Not supported in SIMD-only mode");
12220 }
12221 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12222 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12223                                                   SourceLocation Loc,
12224                                                   llvm::Value *ReductionsPtr,
12225                                                   LValue SharedLVal) {
12226   llvm_unreachable("Not supported in SIMD-only mode");
12227 }
12228 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPTaskDataTy & Data)12229 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12230                                            SourceLocation Loc,
12231                                            const OMPTaskDataTy &Data) {
12232   llvm_unreachable("Not supported in SIMD-only mode");
12233 }
12234 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12235 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12236     CodeGenFunction &CGF, SourceLocation Loc,
12237     OpenMPDirectiveKind CancelRegion) {
12238   llvm_unreachable("Not supported in SIMD-only mode");
12239 }
12240 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12241 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12242                                          SourceLocation Loc, const Expr *IfCond,
12243                                          OpenMPDirectiveKind CancelRegion) {
12244   llvm_unreachable("Not supported in SIMD-only mode");
12245 }
12246 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12247 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12248     const OMPExecutableDirective &D, StringRef ParentName,
12249     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12250     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12251   llvm_unreachable("Not supported in SIMD-only mode");
12252 }
12253 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12254 void CGOpenMPSIMDRuntime::emitTargetCall(
12255     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12256     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12257     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12258     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12259                                      const OMPLoopDirective &D)>
12260         SizeEmitter) {
12261   llvm_unreachable("Not supported in SIMD-only mode");
12262 }
12263 
emitTargetFunctions(GlobalDecl GD)12264 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12265   llvm_unreachable("Not supported in SIMD-only mode");
12266 }
12267 
emitTargetGlobalVariable(GlobalDecl GD)12268 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12269   llvm_unreachable("Not supported in SIMD-only mode");
12270 }
12271 
emitTargetGlobal(GlobalDecl GD)12272 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12273   return false;
12274 }
12275 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)12276 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12277                                         const OMPExecutableDirective &D,
12278                                         SourceLocation Loc,
12279                                         llvm::Function *OutlinedFn,
12280                                         ArrayRef<llvm::Value *> CapturedVars) {
12281   llvm_unreachable("Not supported in SIMD-only mode");
12282 }
12283 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)12284 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12285                                              const Expr *NumTeams,
12286                                              const Expr *ThreadLimit,
12287                                              SourceLocation Loc) {
12288   llvm_unreachable("Not supported in SIMD-only mode");
12289 }
12290 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,CGOpenMPRuntime::TargetDataInfo & Info)12291 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12292     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12293     const Expr *Device, const RegionCodeGenTy &CodeGen,
12294     CGOpenMPRuntime::TargetDataInfo &Info) {
12295   llvm_unreachable("Not supported in SIMD-only mode");
12296 }
12297 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)12298 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12299     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12300     const Expr *Device) {
12301   llvm_unreachable("Not supported in SIMD-only mode");
12302 }
12303 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12304 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12305                                            const OMPLoopDirective &D,
12306                                            ArrayRef<Expr *> NumIterations) {
12307   llvm_unreachable("Not supported in SIMD-only mode");
12308 }
12309 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12310 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12311                                               const OMPDependClause *C) {
12312   llvm_unreachable("Not supported in SIMD-only mode");
12313 }
12314 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDoacrossClause * C)12315 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12316                                               const OMPDoacrossClause *C) {
12317   llvm_unreachable("Not supported in SIMD-only mode");
12318 }
12319 
12320 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12321 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12322                                         const VarDecl *NativeParam) const {
12323   llvm_unreachable("Not supported in SIMD-only mode");
12324 }
12325 
12326 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12327 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12328                                          const VarDecl *NativeParam,
12329                                          const VarDecl *TargetParam) const {
12330   llvm_unreachable("Not supported in SIMD-only mode");
12331 }
12332