xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 31ba4ce8898f9dfa5e7f054fdbc26e50a599a6e3)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412   bool NoInheritance = false;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel,
421                           bool NoInheritance = true)
422       : CGF(CGF), NoInheritance(NoInheritance) {
423     // Start emission for the construct.
424     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
425         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
426     if (NoInheritance) {
427       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
429       CGF.LambdaThisCaptureField = nullptr;
430       BlockInfo = CGF.BlockInfo;
431       CGF.BlockInfo = nullptr;
432     }
433   }
434 
435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     if (NoInheritance) {
442       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
443       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
444       CGF.BlockInfo = BlockInfo;
445     }
446   }
447 };
448 
449 /// Values for bit flags used in the ident_t to describe the fields.
450 /// All enumeric elements are named and described in accordance with the code
451 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
452 enum OpenMPLocationFlags : unsigned {
453   /// Use trampoline for internal microtask.
454   OMP_IDENT_IMD = 0x01,
455   /// Use c-style ident structure.
456   OMP_IDENT_KMPC = 0x02,
457   /// Atomic reduction option for kmpc_reduce.
458   OMP_ATOMIC_REDUCE = 0x10,
459   /// Explicit 'barrier' directive.
460   OMP_IDENT_BARRIER_EXPL = 0x20,
461   /// Implicit barrier in code.
462   OMP_IDENT_BARRIER_IMPL = 0x40,
463   /// Implicit barrier in 'for' directive.
464   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
465   /// Implicit barrier in 'sections' directive.
466   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
467   /// Implicit barrier in 'single' directive.
468   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
469   /// Call of __kmp_for_static_init for static loop.
470   OMP_IDENT_WORK_LOOP = 0x200,
471   /// Call of __kmp_for_static_init for sections.
472   OMP_IDENT_WORK_SECTIONS = 0x400,
473   /// Call of __kmp_for_static_init for distribute.
474   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
475   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
476 };
477 
478 namespace {
479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
480 /// Values for bit flags for marking which requires clauses have been used.
481 enum OpenMPOffloadingRequiresDirFlags : int64_t {
482   /// flag undefined.
483   OMP_REQ_UNDEFINED               = 0x000,
484   /// no requires clause present.
485   OMP_REQ_NONE                    = 0x001,
486   /// reverse_offload clause.
487   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
488   /// unified_address clause.
489   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
490   /// unified_shared_memory clause.
491   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
492   /// dynamic_allocators clause.
493   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
494   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
495 };
496 
497 enum OpenMPOffloadingReservedDeviceIDs {
498   /// Device ID if the device was not defined, runtime should get it
499   /// from environment variables in the spec.
500   OMP_DEVICEID_UNDEF = -1,
501 };
502 } // anonymous namespace
503 
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
510 ///                                  see above  */
511 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
512 ///                                  KMP_IDENT_KMPC identifies this union
513 ///                                  member  */
514 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
515 ///                                  see above */
516 ///#if USE_ITT_BUILD
517 ///                            /*  but currently used for storing
518 ///                                region-specific ITT */
519 ///                            /*  contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
522 ///                                 C++  */
523 ///    char const *psource;    /**< String describing the source location.
524 ///                            The string is composed of semi-colon separated
525 //                             fields which describe the source file,
526 ///                            the function and a pair of line numbers that
527 ///                            delimit the construct.
528 ///                             */
529 /// } ident_t;
530 enum IdentFieldIndex {
531   /// might be used in Fortran
532   IdentField_Reserved_1,
533   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534   IdentField_Flags,
535   /// Not really used in Fortran any more
536   IdentField_Reserved_2,
537   /// Source[4] in Fortran, do not use for C++
538   IdentField_Reserved_3,
539   /// String describing the source location. The string is composed of
540   /// semi-colon separated fields which describe the source file, the function
541   /// and a pair of line numbers that delimit the construct.
542   IdentField_PSource
543 };
544 
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548   /// Lower bound for default (unordered) versions.
549   OMP_sch_lower = 32,
550   OMP_sch_static_chunked = 33,
551   OMP_sch_static = 34,
552   OMP_sch_dynamic_chunked = 35,
553   OMP_sch_guided_chunked = 36,
554   OMP_sch_runtime = 37,
555   OMP_sch_auto = 38,
556   /// static with chunk adjustment (e.g., simd)
557   OMP_sch_static_balanced_chunked = 45,
558   /// Lower bound for 'ordered' versions.
559   OMP_ord_lower = 64,
560   OMP_ord_static_chunked = 65,
561   OMP_ord_static = 66,
562   OMP_ord_dynamic_chunked = 67,
563   OMP_ord_guided_chunked = 68,
564   OMP_ord_runtime = 69,
565   OMP_ord_auto = 70,
566   OMP_sch_default = OMP_sch_static,
567   /// dist_schedule types
568   OMP_dist_sch_static_chunked = 91,
569   OMP_dist_sch_static = 92,
570   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571   /// Set if the monotonic schedule modifier was present.
572   OMP_sch_modifier_monotonic = (1 << 29),
573   /// Set if the nonmonotonic schedule modifier was present.
574   OMP_sch_modifier_nonmonotonic = (1 << 30),
575 };
576 
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580   PrePostActionTy *Action;
581 
582 public:
583   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585     if (!CGF.HaveInsertPoint())
586       return;
587     Action->Exit(CGF);
588   }
589 };
590 
591 } // anonymous namespace
592 
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594   CodeGenFunction::RunCleanupsScope Scope(CGF);
595   if (PrePostAction) {
596     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597     Callback(CodeGen, CGF, *PrePostAction);
598   } else {
599     PrePostActionTy Action;
600     Callback(CodeGen, CGF, Action);
601   }
602 }
603 
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610       if (const auto *DRE =
611               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613           return DRD;
614   return nullptr;
615 }
616 
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618                                              const OMPDeclareReductionDecl *DRD,
619                                              const Expr *InitOp,
620                                              Address Private, Address Original,
621                                              QualType Ty) {
622   if (DRD->getInitializer()) {
623     std::pair<llvm::Function *, llvm::Function *> Reduction =
624         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625     const auto *CE = cast<CallExpr>(InitOp);
626     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629     const auto *LHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631     const auto *RHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
635                             [=]() { return Private; });
636     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
637                             [=]() { return Original; });
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate:
659       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
660       break;
661     }
662     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
663     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
664     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
665                          /*IsInitializer=*/false);
666   }
667 }
668 
669 /// Emit initialization of arrays of complex types.
670 /// \param DestAddr Address of the array.
671 /// \param Type Type of array.
672 /// \param Init Initial expression of array.
673 /// \param SrcAddr Address of the original array.
674 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
675                                  QualType Type, bool EmitDeclareReductionInit,
676                                  const Expr *Init,
677                                  const OMPDeclareReductionDecl *DRD,
678                                  Address SrcAddr = Address::invalid()) {
679   // Perform element-by-element initialization.
680   QualType ElementTy;
681 
682   // Drill down to the base element type on both arrays.
683   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
684   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
685   DestAddr =
686       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
687   if (DRD)
688     SrcAddr =
689         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
690 
691   llvm::Value *SrcBegin = nullptr;
692   if (DRD)
693     SrcBegin = SrcAddr.getPointer();
694   llvm::Value *DestBegin = DestAddr.getPointer();
695   // Cast from pointer to array type to pointer to single element.
696   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
697   // The basic structure here is a while-do loop.
698   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
699   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
700   llvm::Value *IsEmpty =
701       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
702   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
703 
704   // Enter the loop body, making that address the current address.
705   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
706   CGF.EmitBlock(BodyBB);
707 
708   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
709 
710   llvm::PHINode *SrcElementPHI = nullptr;
711   Address SrcElementCurrent = Address::invalid();
712   if (DRD) {
713     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
714                                           "omp.arraycpy.srcElementPast");
715     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716     SrcElementCurrent =
717         Address(SrcElementPHI,
718                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
719   }
720   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
721       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722   DestElementPHI->addIncoming(DestBegin, EntryBB);
723   Address DestElementCurrent =
724       Address(DestElementPHI,
725               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726 
727   // Emit copy.
728   {
729     CodeGenFunction::RunCleanupsScope InitScope(CGF);
730     if (EmitDeclareReductionInit) {
731       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
732                                        SrcElementCurrent, ElementTy);
733     } else
734       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
735                            /*IsInitializer=*/false);
736   }
737 
738   if (DRD) {
739     // Shift the address forward by one element.
740     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
741         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
743   }
744 
745   // Shift the address forward by one element.
746   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
747       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
748   // Check whether we've reached the end.
749   llvm::Value *Done =
750       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
751   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
752   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
753 
754   // Done.
755   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
756 }
757 
758 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
759   return CGF.EmitOMPSharedLValue(E);
760 }
761 
762 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
763                                             const Expr *E) {
764   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
765     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
766   return LValue();
767 }
768 
769 void ReductionCodeGen::emitAggregateInitialization(
770     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
771     const OMPDeclareReductionDecl *DRD) {
772   // Emit VarDecl with copy init for arrays.
773   // Get the address of the original variable captured in current
774   // captured region.
775   const auto *PrivateVD =
776       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
777   bool EmitDeclareReductionInit =
778       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
779   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
780                        EmitDeclareReductionInit,
781                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
782                                                 : PrivateVD->getInit(),
783                        DRD, SharedLVal.getAddress(CGF));
784 }
785 
786 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
787                                    ArrayRef<const Expr *> Origs,
788                                    ArrayRef<const Expr *> Privates,
789                                    ArrayRef<const Expr *> ReductionOps) {
790   ClausesData.reserve(Shareds.size());
791   SharedAddresses.reserve(Shareds.size());
792   Sizes.reserve(Shareds.size());
793   BaseDecls.reserve(Shareds.size());
794   const auto *IOrig = Origs.begin();
795   const auto *IPriv = Privates.begin();
796   const auto *IRed = ReductionOps.begin();
797   for (const Expr *Ref : Shareds) {
798     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
799     std::advance(IOrig, 1);
800     std::advance(IPriv, 1);
801     std::advance(IRed, 1);
802   }
803 }
804 
805 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
806   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
807          "Number of generated lvalues must be exactly N.");
808   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
809   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
810   SharedAddresses.emplace_back(First, Second);
811   if (ClausesData[N].Shared == ClausesData[N].Ref) {
812     OrigAddresses.emplace_back(First, Second);
813   } else {
814     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
815     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
816     OrigAddresses.emplace_back(First, Second);
817   }
818 }
819 
820 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
821   const auto *PrivateVD =
822       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
823   QualType PrivateType = PrivateVD->getType();
824   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
825   if (!PrivateType->isVariablyModifiedType()) {
826     Sizes.emplace_back(
827         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
828         nullptr);
829     return;
830   }
831   llvm::Value *Size;
832   llvm::Value *SizeInChars;
833   auto *ElemType =
834       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
835           ->getElementType();
836   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
837   if (AsArraySection) {
838     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
839                                      OrigAddresses[N].first.getPointer(CGF));
840     Size = CGF.Builder.CreateNUWAdd(
841         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
842     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
843   } else {
844     SizeInChars =
845         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
846     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
847   }
848   Sizes.emplace_back(SizeInChars, Size);
849   CodeGenFunction::OpaqueValueMapping OpaqueMap(
850       CGF,
851       cast<OpaqueValueExpr>(
852           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
853       RValue::get(Size));
854   CGF.EmitVariablyModifiedType(PrivateType);
855 }
856 
857 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
858                                          llvm::Value *Size) {
859   const auto *PrivateVD =
860       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
861   QualType PrivateType = PrivateVD->getType();
862   if (!PrivateType->isVariablyModifiedType()) {
863     assert(!Size && !Sizes[N].second &&
864            "Size should be nullptr for non-variably modified reduction "
865            "items.");
866     return;
867   }
868   CodeGenFunction::OpaqueValueMapping OpaqueMap(
869       CGF,
870       cast<OpaqueValueExpr>(
871           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
872       RValue::get(Size));
873   CGF.EmitVariablyModifiedType(PrivateType);
874 }
875 
876 void ReductionCodeGen::emitInitialization(
877     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
878     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
879   assert(SharedAddresses.size() > N && "No variable was generated");
880   const auto *PrivateVD =
881       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
882   const OMPDeclareReductionDecl *DRD =
883       getReductionInit(ClausesData[N].ReductionOp);
884   QualType PrivateType = PrivateVD->getType();
885   PrivateAddr = CGF.Builder.CreateElementBitCast(
886       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
887   QualType SharedType = SharedAddresses[N].first.getType();
888   SharedLVal = CGF.MakeAddrLValue(
889       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
890                                        CGF.ConvertTypeForMem(SharedType)),
891       SharedType, SharedAddresses[N].first.getBaseInfo(),
892       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
893   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894     if (DRD && DRD->getInitializer())
895       (void)DefaultInit(CGF);
896     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
897   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898     (void)DefaultInit(CGF);
899     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
900                                      PrivateAddr, SharedLVal.getAddress(CGF),
901                                      SharedLVal.getType());
902   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905                          PrivateVD->getType().getQualifiers(),
906                          /*IsInitializer=*/false);
907   }
908 }
909 
910 bool ReductionCodeGen::needCleanups(unsigned N) {
911   const auto *PrivateVD =
912       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913   QualType PrivateType = PrivateVD->getType();
914   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915   return DTorKind != QualType::DK_none;
916 }
917 
918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919                                     Address PrivateAddr) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   if (needCleanups(N)) {
925     PrivateAddr = CGF.Builder.CreateElementBitCast(
926         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928   }
929 }
930 
931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932                           LValue BaseLV) {
933   BaseTy = BaseTy.getNonReferenceType();
934   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938     } else {
939       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941     }
942     BaseTy = BaseTy->getPointeeType();
943   }
944   return CGF.MakeAddrLValue(
945       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946                                        CGF.ConvertTypeForMem(ElTy)),
947       BaseLV.getType(), BaseLV.getBaseInfo(),
948       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953                           llvm::Value *Addr) {
954   Address Tmp = Address::invalid();
955   Address TopTmp = Address::invalid();
956   Address MostTopTmp = Address::invalid();
957   BaseTy = BaseTy.getNonReferenceType();
958   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960     Tmp = CGF.CreateMemTemp(BaseTy);
961     if (TopTmp.isValid())
962       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963     else
964       MostTopTmp = Tmp;
965     TopTmp = Tmp;
966     BaseTy = BaseTy->getPointeeType();
967   }
968   llvm::Type *Ty = BaseLVType;
969   if (Tmp.isValid())
970     Ty = Tmp.getElementType();
971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972   if (Tmp.isValid()) {
973     CGF.Builder.CreateStore(Addr, Tmp);
974     return MostTopTmp;
975   }
976   return Address(Addr, BaseLVAlignment);
977 }
978 
979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980   const VarDecl *OrigVD = nullptr;
981   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984       Base = TempOASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992       Base = TempASE->getBase()->IgnoreParenImpCasts();
993     DE = cast<DeclRefExpr>(Base);
994     OrigVD = cast<VarDecl>(DE->getDecl());
995   }
996   return OrigVD;
997 }
998 
999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000                                                Address PrivateAddr) {
1001   const DeclRefExpr *DE;
1002   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003     BaseDecls.emplace_back(OrigVD);
1004     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005     LValue BaseLValue =
1006         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007                     OriginalBaseLValue);
1008     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1009         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1010     llvm::Value *PrivatePointer =
1011         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1012             PrivateAddr.getPointer(),
1013             SharedAddresses[N].first.getAddress(CGF).getType());
1014     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1015     return castToBase(CGF, OrigVD->getType(),
1016                       SharedAddresses[N].first.getType(),
1017                       OriginalBaseLValue.getAddress(CGF).getType(),
1018                       OriginalBaseLValue.getAlignment(), Ptr);
1019   }
1020   BaseDecls.emplace_back(
1021       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1022   return PrivateAddr;
1023 }
1024 
1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1026   const OMPDeclareReductionDecl *DRD =
1027       getReductionInit(ClausesData[N].ReductionOp);
1028   return DRD && DRD->getInitializer();
1029 }
1030 
1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1032   return CGF.EmitLoadOfPointerLValue(
1033       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1034       getThreadIDVariable()->getType()->castAs<PointerType>());
1035 }
1036 
1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1038   if (!CGF.HaveInsertPoint())
1039     return;
1040   // 1.2.2 OpenMP Language Terminology
1041   // Structured block - An executable statement with a single entry at the
1042   // top and a single exit at the bottom.
1043   // The point of exit cannot be a branch out of the structured block.
1044   // longjmp() and throw() must not violate the entry/exit criteria.
1045   CGF.EHStack.pushTerminate();
1046   CodeGen(CGF);
1047   CGF.EHStack.popTerminate();
1048 }
1049 
1050 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1051     CodeGenFunction &CGF) {
1052   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1053                             getThreadIDVariable()->getType(),
1054                             AlignmentSource::Decl);
1055 }
1056 
1057 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1058                                        QualType FieldTy) {
1059   auto *Field = FieldDecl::Create(
1060       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1061       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1062       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1063   Field->setAccess(AS_public);
1064   DC->addDecl(Field);
1065   return Field;
1066 }
1067 
1068 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1069                                  StringRef Separator)
1070     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1071       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1072   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1073 
1074   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1075   OMPBuilder.initialize();
1076   loadOffloadInfoMetadata();
1077 }
1078 
1079 void CGOpenMPRuntime::clear() {
1080   InternalVars.clear();
1081   // Clean non-target variable declarations possibly used only in debug info.
1082   for (const auto &Data : EmittedNonTargetVariables) {
1083     if (!Data.getValue().pointsToAliveValue())
1084       continue;
1085     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1086     if (!GV)
1087       continue;
1088     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1089       continue;
1090     GV->eraseFromParent();
1091   }
1092 }
1093 
1094 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1095   SmallString<128> Buffer;
1096   llvm::raw_svector_ostream OS(Buffer);
1097   StringRef Sep = FirstSeparator;
1098   for (StringRef Part : Parts) {
1099     OS << Sep << Part;
1100     Sep = Separator;
1101   }
1102   return std::string(OS.str());
1103 }
1104 
1105 static llvm::Function *
1106 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1107                           const Expr *CombinerInitializer, const VarDecl *In,
1108                           const VarDecl *Out, bool IsCombiner) {
1109   // void .omp_combiner.(Ty *in, Ty *out);
1110   ASTContext &C = CGM.getContext();
1111   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1112   FunctionArgList Args;
1113   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1114                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1115   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1116                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1117   Args.push_back(&OmpOutParm);
1118   Args.push_back(&OmpInParm);
1119   const CGFunctionInfo &FnInfo =
1120       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1121   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1122   std::string Name = CGM.getOpenMPRuntime().getName(
1123       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1124   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1125                                     Name, &CGM.getModule());
1126   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1127   if (CGM.getLangOpts().Optimize) {
1128     Fn->removeFnAttr(llvm::Attribute::NoInline);
1129     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1130     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1131   }
1132   CodeGenFunction CGF(CGM);
1133   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1134   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1135   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1136                     Out->getLocation());
1137   CodeGenFunction::OMPPrivateScope Scope(CGF);
1138   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1139   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1140     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1141         .getAddress(CGF);
1142   });
1143   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1144   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1145     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1146         .getAddress(CGF);
1147   });
1148   (void)Scope.Privatize();
1149   if (!IsCombiner && Out->hasInit() &&
1150       !CGF.isTrivialInitializer(Out->getInit())) {
1151     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1152                          Out->getType().getQualifiers(),
1153                          /*IsInitializer=*/true);
1154   }
1155   if (CombinerInitializer)
1156     CGF.EmitIgnoredExpr(CombinerInitializer);
1157   Scope.ForceCleanup();
1158   CGF.FinishFunction();
1159   return Fn;
1160 }
1161 
1162 void CGOpenMPRuntime::emitUserDefinedReduction(
1163     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1164   if (UDRMap.count(D) > 0)
1165     return;
1166   llvm::Function *Combiner = emitCombinerOrInitializer(
1167       CGM, D->getType(), D->getCombiner(),
1168       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1169       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1170       /*IsCombiner=*/true);
1171   llvm::Function *Initializer = nullptr;
1172   if (const Expr *Init = D->getInitializer()) {
1173     Initializer = emitCombinerOrInitializer(
1174         CGM, D->getType(),
1175         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1176                                                                      : nullptr,
1177         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1178         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1179         /*IsCombiner=*/false);
1180   }
1181   UDRMap.try_emplace(D, Combiner, Initializer);
1182   if (CGF) {
1183     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1184     Decls.second.push_back(D);
1185   }
1186 }
1187 
1188 std::pair<llvm::Function *, llvm::Function *>
1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1190   auto I = UDRMap.find(D);
1191   if (I != UDRMap.end())
1192     return I->second;
1193   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1194   return UDRMap.lookup(D);
1195 }
1196 
1197 namespace {
1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1199 // Builder if one is present.
1200 struct PushAndPopStackRAII {
1201   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1202                       bool HasCancel)
1203       : OMPBuilder(OMPBuilder) {
1204     if (!OMPBuilder)
1205       return;
1206 
1207     // The following callback is the crucial part of clangs cleanup process.
1208     //
1209     // NOTE:
1210     // Once the OpenMPIRBuilder is used to create parallel regions (and
1211     // similar), the cancellation destination (Dest below) is determined via
1212     // IP. That means if we have variables to finalize we split the block at IP,
1213     // use the new block (=BB) as destination to build a JumpDest (via
1214     // getJumpDestInCurrentScope(BB)) which then is fed to
1215     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1216     // to push & pop an FinalizationInfo object.
1217     // The FiniCB will still be needed but at the point where the
1218     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1219     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1220       assert(IP.getBlock()->end() == IP.getPoint() &&
1221              "Clang CG should cause non-terminated block!");
1222       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1223       CGF.Builder.restoreIP(IP);
1224       CodeGenFunction::JumpDest Dest =
1225           CGF.getOMPCancelDestination(OMPD_parallel);
1226       CGF.EmitBranchThroughCleanup(Dest);
1227     };
1228 
1229     // TODO: Remove this once we emit parallel regions through the
1230     //       OpenMPIRBuilder as it can do this setup internally.
1231     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1232         {FiniCB, OMPD_parallel, HasCancel});
1233     OMPBuilder->pushFinalizationCB(std::move(FI));
1234   }
1235   ~PushAndPopStackRAII() {
1236     if (OMPBuilder)
1237       OMPBuilder->popFinalizationCB();
1238   }
1239   llvm::OpenMPIRBuilder *OMPBuilder;
1240 };
1241 } // namespace
1242 
1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1244     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1245     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1246     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1247   assert(ThreadIDVar->getType()->isPointerType() &&
1248          "thread id variable must be of type kmp_int32 *");
1249   CodeGenFunction CGF(CGM, true);
1250   bool HasCancel = false;
1251   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1252     HasCancel = OPD->hasCancel();
1253   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1254     HasCancel = OPD->hasCancel();
1255   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1256     HasCancel = OPSD->hasCancel();
1257   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1258     HasCancel = OPFD->hasCancel();
1259   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1260     HasCancel = OPFD->hasCancel();
1261   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263   else if (const auto *OPFD =
1264                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269 
1270   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1271   //       parallel region to make cancellation barriers work properly.
1272   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1273   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1274   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1275                                     HasCancel, OutlinedHelperName);
1276   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1277   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1278 }
1279 
1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1281     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1282     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1283   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1284   return emitParallelOrTeamsOutlinedFunction(
1285       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1286 }
1287 
1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1289     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1291   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1292   return emitParallelOrTeamsOutlinedFunction(
1293       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1294 }
1295 
1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1297     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1298     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1300     bool Tied, unsigned &NumberOfParts) {
1301   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1302                                               PrePostActionTy &) {
1303     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1304     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1305     llvm::Value *TaskArgs[] = {
1306         UpLoc, ThreadID,
1307         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1308                                     TaskTVar->getType()->castAs<PointerType>())
1309             .getPointer(CGF)};
1310     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1311                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1312                         TaskArgs);
1313   };
1314   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1315                                                             UntiedCodeGen);
1316   CodeGen.setAction(Action);
1317   assert(!ThreadIDVar->getType()->isPointerType() &&
1318          "thread id variable must be of type kmp_int32 for tasks");
1319   const OpenMPDirectiveKind Region =
1320       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1321                                                       : OMPD_task;
1322   const CapturedStmt *CS = D.getCapturedStmt(Region);
1323   bool HasCancel = false;
1324   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1327     HasCancel = TD->hasCancel();
1328   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1329     HasCancel = TD->hasCancel();
1330   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332 
1333   CodeGenFunction CGF(CGM, true);
1334   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1335                                         InnermostKind, HasCancel, Action);
1336   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1337   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1338   if (!Tied)
1339     NumberOfParts = Action.getNumberOfParts();
1340   return Res;
1341 }
1342 
1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1344                              const RecordDecl *RD, const CGRecordLayout &RL,
1345                              ArrayRef<llvm::Constant *> Data) {
1346   llvm::StructType *StructTy = RL.getLLVMType();
1347   unsigned PrevIdx = 0;
1348   ConstantInitBuilder CIBuilder(CGM);
1349   auto DI = Data.begin();
1350   for (const FieldDecl *FD : RD->fields()) {
1351     unsigned Idx = RL.getLLVMFieldNo(FD);
1352     // Fill the alignment.
1353     for (unsigned I = PrevIdx; I < Idx; ++I)
1354       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1355     PrevIdx = Idx + 1;
1356     Fields.add(*DI);
1357     ++DI;
1358   }
1359 }
1360 
1361 template <class... As>
1362 static llvm::GlobalVariable *
1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1364                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1365                    As &&... Args) {
1366   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1367   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1368   ConstantInitBuilder CIBuilder(CGM);
1369   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1370   buildStructValue(Fields, CGM, RD, RL, Data);
1371   return Fields.finishAndCreateGlobal(
1372       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1373       std::forward<As>(Args)...);
1374 }
1375 
1376 template <typename T>
1377 static void
1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1379                                          ArrayRef<llvm::Constant *> Data,
1380                                          T &Parent) {
1381   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1382   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1383   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1384   buildStructValue(Fields, CGM, RD, RL, Data);
1385   Fields.finishAndAddTo(Parent);
1386 }
1387 
1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1389                                              bool AtCurrentPoint) {
1390   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1391   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1392 
1393   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1394   if (AtCurrentPoint) {
1395     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1396         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1397   } else {
1398     Elem.second.ServiceInsertPt =
1399         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1400     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1401   }
1402 }
1403 
1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1405   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1406   if (Elem.second.ServiceInsertPt) {
1407     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1408     Elem.second.ServiceInsertPt = nullptr;
1409     Ptr->eraseFromParent();
1410   }
1411 }
1412 
1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1414                                                   SourceLocation Loc,
1415                                                   SmallString<128> &Buffer) {
1416   llvm::raw_svector_ostream OS(Buffer);
1417   // Build debug location
1418   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1419   OS << ";" << PLoc.getFilename() << ";";
1420   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1421     OS << FD->getQualifiedNameAsString();
1422   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1423   return OS.str();
1424 }
1425 
1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1427                                                  SourceLocation Loc,
1428                                                  unsigned Flags) {
1429   llvm::Constant *SrcLocStr;
1430   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1431       Loc.isInvalid()) {
1432     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1433   } else {
1434     std::string FunctionName = "";
1435     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1436       FunctionName = FD->getQualifiedNameAsString();
1437     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1438     const char *FileName = PLoc.getFilename();
1439     unsigned Line = PLoc.getLine();
1440     unsigned Column = PLoc.getColumn();
1441     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1442                                                 Line, Column);
1443   }
1444   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1445   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1446                                      Reserved2Flags);
1447 }
1448 
1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1450                                           SourceLocation Loc) {
1451   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1452   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1453   // the clang invariants used below might be broken.
1454   if (CGM.getLangOpts().OpenMPIRBuilder) {
1455     SmallString<128> Buffer;
1456     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1457     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1458         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1459     return OMPBuilder.getOrCreateThreadID(
1460         OMPBuilder.getOrCreateIdent(SrcLocStr));
1461   }
1462 
1463   llvm::Value *ThreadID = nullptr;
1464   // Check whether we've already cached a load of the thread id in this
1465   // function.
1466   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1467   if (I != OpenMPLocThreadIDMap.end()) {
1468     ThreadID = I->second.ThreadID;
1469     if (ThreadID != nullptr)
1470       return ThreadID;
1471   }
1472   // If exceptions are enabled, do not use parameter to avoid possible crash.
1473   if (auto *OMPRegionInfo =
1474           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1475     if (OMPRegionInfo->getThreadIDVariable()) {
1476       // Check if this an outlined function with thread id passed as argument.
1477       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1478       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1479       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1480           !CGF.getLangOpts().CXXExceptions ||
1481           CGF.Builder.GetInsertBlock() == TopBlock ||
1482           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1483           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1484               TopBlock ||
1485           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1486               CGF.Builder.GetInsertBlock()) {
1487         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1488         // If value loaded in entry block, cache it and use it everywhere in
1489         // function.
1490         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1491           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1492           Elem.second.ThreadID = ThreadID;
1493         }
1494         return ThreadID;
1495       }
1496     }
1497   }
1498 
1499   // This is not an outlined function region - need to call __kmpc_int32
1500   // kmpc_global_thread_num(ident_t *loc).
1501   // Generate thread id value and cache this value for use across the
1502   // function.
1503   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1504   if (!Elem.second.ServiceInsertPt)
1505     setLocThreadIdInsertPt(CGF);
1506   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1507   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1508   llvm::CallInst *Call = CGF.Builder.CreateCall(
1509       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1510                                             OMPRTL___kmpc_global_thread_num),
1511       emitUpdateLocation(CGF, Loc));
1512   Call->setCallingConv(CGF.getRuntimeCC());
1513   Elem.second.ThreadID = Call;
1514   return Call;
1515 }
1516 
1517 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1520     clearLocThreadIdInsertPt(CGF);
1521     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1522   }
1523   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1524     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1525       UDRMap.erase(D);
1526     FunctionUDRMap.erase(CGF.CurFn);
1527   }
1528   auto I = FunctionUDMMap.find(CGF.CurFn);
1529   if (I != FunctionUDMMap.end()) {
1530     for(const auto *D : I->second)
1531       UDMMap.erase(D);
1532     FunctionUDMMap.erase(I);
1533   }
1534   LastprivateConditionalToTypes.erase(CGF.CurFn);
1535   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1536 }
1537 
1538 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1539   return OMPBuilder.IdentPtr;
1540 }
1541 
1542 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1543   if (!Kmpc_MicroTy) {
1544     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1545     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1546                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1547     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1548   }
1549   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1550 }
1551 
1552 llvm::FunctionCallee
1553 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1554   assert((IVSize == 32 || IVSize == 64) &&
1555          "IV size is not compatible with the omp runtime");
1556   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1557                                             : "__kmpc_for_static_init_4u")
1558                                 : (IVSigned ? "__kmpc_for_static_init_8"
1559                                             : "__kmpc_for_static_init_8u");
1560   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562   llvm::Type *TypeParams[] = {
1563     getIdentTyPointerTy(),                     // loc
1564     CGM.Int32Ty,                               // tid
1565     CGM.Int32Ty,                               // schedtype
1566     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567     PtrTy,                                     // p_lower
1568     PtrTy,                                     // p_upper
1569     PtrTy,                                     // p_stride
1570     ITy,                                       // incr
1571     ITy                                        // chunk
1572   };
1573   auto *FnTy =
1574       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575   return CGM.CreateRuntimeFunction(FnTy, Name);
1576 }
1577 
1578 llvm::FunctionCallee
1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580   assert((IVSize == 32 || IVSize == 64) &&
1581          "IV size is not compatible with the omp runtime");
1582   StringRef Name =
1583       IVSize == 32
1584           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588                                CGM.Int32Ty,           // tid
1589                                CGM.Int32Ty,           // schedtype
1590                                ITy,                   // lower
1591                                ITy,                   // upper
1592                                ITy,                   // stride
1593                                ITy                    // chunk
1594   };
1595   auto *FnTy =
1596       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597   return CGM.CreateRuntimeFunction(FnTy, Name);
1598 }
1599 
1600 llvm::FunctionCallee
1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602   assert((IVSize == 32 || IVSize == 64) &&
1603          "IV size is not compatible with the omp runtime");
1604   StringRef Name =
1605       IVSize == 32
1606           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608   llvm::Type *TypeParams[] = {
1609       getIdentTyPointerTy(), // loc
1610       CGM.Int32Ty,           // tid
1611   };
1612   auto *FnTy =
1613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614   return CGM.CreateRuntimeFunction(FnTy, Name);
1615 }
1616 
1617 llvm::FunctionCallee
1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619   assert((IVSize == 32 || IVSize == 64) &&
1620          "IV size is not compatible with the omp runtime");
1621   StringRef Name =
1622       IVSize == 32
1623           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627   llvm::Type *TypeParams[] = {
1628     getIdentTyPointerTy(),                     // loc
1629     CGM.Int32Ty,                               // tid
1630     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631     PtrTy,                                     // p_lower
1632     PtrTy,                                     // p_upper
1633     PtrTy                                      // p_stride
1634   };
1635   auto *FnTy =
1636       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637   return CGM.CreateRuntimeFunction(FnTy, Name);
1638 }
1639 
1640 /// Obtain information that uniquely identifies a target entry. This
1641 /// consists of the file and device IDs as well as line number associated with
1642 /// the relevant entry source location.
1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644                                      unsigned &DeviceID, unsigned &FileID,
1645                                      unsigned &LineNum) {
1646   SourceManager &SM = C.getSourceManager();
1647 
1648   // The loc should be always valid and have a file ID (the user cannot use
1649   // #pragma directives in macros)
1650 
1651   assert(Loc.isValid() && "Source location is expected to be always valid.");
1652 
1653   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1655 
1656   llvm::sys::fs::UniqueID ID;
1657   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1658     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1659         << PLoc.getFilename() << EC.message();
1660 
1661   DeviceID = ID.getDevice();
1662   FileID = ID.getFile();
1663   LineNum = PLoc.getLine();
1664 }
1665 
1666 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1667   if (CGM.getLangOpts().OpenMPSimd)
1668     return Address::invalid();
1669   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1670       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1671   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1672               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1673                HasRequiresUnifiedSharedMemory))) {
1674     SmallString<64> PtrName;
1675     {
1676       llvm::raw_svector_ostream OS(PtrName);
1677       OS << CGM.getMangledName(GlobalDecl(VD));
1678       if (!VD->isExternallyVisible()) {
1679         unsigned DeviceID, FileID, Line;
1680         getTargetEntryUniqueInfo(CGM.getContext(),
1681                                  VD->getCanonicalDecl()->getBeginLoc(),
1682                                  DeviceID, FileID, Line);
1683         OS << llvm::format("_%x", FileID);
1684       }
1685       OS << "_decl_tgt_ref_ptr";
1686     }
1687     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1688     if (!Ptr) {
1689       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1690       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1691                                         PtrName);
1692 
1693       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1694       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1695 
1696       if (!CGM.getLangOpts().OpenMPIsDevice)
1697         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1698       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1699     }
1700     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1701   }
1702   return Address::invalid();
1703 }
1704 
1705 llvm::Constant *
1706 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1707   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1708          !CGM.getContext().getTargetInfo().isTLSSupported());
1709   // Lookup the entry, lazily creating it if necessary.
1710   std::string Suffix = getName({"cache", ""});
1711   return getOrCreateInternalVariable(
1712       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1713 }
1714 
1715 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1716                                                 const VarDecl *VD,
1717                                                 Address VDAddr,
1718                                                 SourceLocation Loc) {
1719   if (CGM.getLangOpts().OpenMPUseTLS &&
1720       CGM.getContext().getTargetInfo().isTLSSupported())
1721     return VDAddr;
1722 
1723   llvm::Type *VarTy = VDAddr.getElementType();
1724   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1725                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1726                                                        CGM.Int8PtrTy),
1727                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1728                          getOrCreateThreadPrivateCache(VD)};
1729   return Address(CGF.EmitRuntimeCall(
1730                      OMPBuilder.getOrCreateRuntimeFunction(
1731                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1732                      Args),
1733                  VDAddr.getAlignment());
1734 }
1735 
1736 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1737     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1738     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1739   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1740   // library.
1741   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1742   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1743                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1744                       OMPLoc);
1745   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1746   // to register constructor/destructor for variable.
1747   llvm::Value *Args[] = {
1748       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1749       Ctor, CopyCtor, Dtor};
1750   CGF.EmitRuntimeCall(
1751       OMPBuilder.getOrCreateRuntimeFunction(
1752           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1753       Args);
1754 }
1755 
1756 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1757     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1758     bool PerformInit, CodeGenFunction *CGF) {
1759   if (CGM.getLangOpts().OpenMPUseTLS &&
1760       CGM.getContext().getTargetInfo().isTLSSupported())
1761     return nullptr;
1762 
1763   VD = VD->getDefinition(CGM.getContext());
1764   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1765     QualType ASTTy = VD->getType();
1766 
1767     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1768     const Expr *Init = VD->getAnyInitializer();
1769     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1770       // Generate function that re-emits the declaration's initializer into the
1771       // threadprivate copy of the variable VD
1772       CodeGenFunction CtorCGF(CGM);
1773       FunctionArgList Args;
1774       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1775                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1776                             ImplicitParamDecl::Other);
1777       Args.push_back(&Dst);
1778 
1779       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1780           CGM.getContext().VoidPtrTy, Args);
1781       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1782       std::string Name = getName({"__kmpc_global_ctor_", ""});
1783       llvm::Function *Fn =
1784           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1785       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1786                             Args, Loc, Loc);
1787       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1788           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1789           CGM.getContext().VoidPtrTy, Dst.getLocation());
1790       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1791       Arg = CtorCGF.Builder.CreateElementBitCast(
1792           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1793       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1794                                /*IsInitializer=*/true);
1795       ArgVal = CtorCGF.EmitLoadOfScalar(
1796           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1797           CGM.getContext().VoidPtrTy, Dst.getLocation());
1798       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1799       CtorCGF.FinishFunction();
1800       Ctor = Fn;
1801     }
1802     if (VD->getType().isDestructedType() != QualType::DK_none) {
1803       // Generate function that emits destructor call for the threadprivate copy
1804       // of the variable VD
1805       CodeGenFunction DtorCGF(CGM);
1806       FunctionArgList Args;
1807       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1808                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1809                             ImplicitParamDecl::Other);
1810       Args.push_back(&Dst);
1811 
1812       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1813           CGM.getContext().VoidTy, Args);
1814       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1815       std::string Name = getName({"__kmpc_global_dtor_", ""});
1816       llvm::Function *Fn =
1817           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1818       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1819       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1820                             Loc, Loc);
1821       // Create a scope with an artificial location for the body of this function.
1822       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1823       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1824           DtorCGF.GetAddrOfLocalVar(&Dst),
1825           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1826       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1827                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1828                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1829       DtorCGF.FinishFunction();
1830       Dtor = Fn;
1831     }
1832     // Do not emit init function if it is not required.
1833     if (!Ctor && !Dtor)
1834       return nullptr;
1835 
1836     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1837     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1838                                                /*isVarArg=*/false)
1839                            ->getPointerTo();
1840     // Copying constructor for the threadprivate variable.
1841     // Must be NULL - reserved by runtime, but currently it requires that this
1842     // parameter is always NULL. Otherwise it fires assertion.
1843     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1844     if (Ctor == nullptr) {
1845       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Ctor = llvm::Constant::getNullValue(CtorTy);
1849     }
1850     if (Dtor == nullptr) {
1851       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1852                                              /*isVarArg=*/false)
1853                          ->getPointerTo();
1854       Dtor = llvm::Constant::getNullValue(DtorTy);
1855     }
1856     if (!CGF) {
1857       auto *InitFunctionTy =
1858           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1859       std::string Name = getName({"__omp_threadprivate_init_", ""});
1860       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1861           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1862       CodeGenFunction InitCGF(CGM);
1863       FunctionArgList ArgList;
1864       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1865                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1866                             Loc, Loc);
1867       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1868       InitCGF.FinishFunction();
1869       return InitFunction;
1870     }
1871     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1872   }
1873   return nullptr;
1874 }
1875 
1876 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1877                                                      llvm::GlobalVariable *Addr,
1878                                                      bool PerformInit) {
1879   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1880       !CGM.getLangOpts().OpenMPIsDevice)
1881     return false;
1882   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1883       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1884   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1885       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1886        HasRequiresUnifiedSharedMemory))
1887     return CGM.getLangOpts().OpenMPIsDevice;
1888   VD = VD->getDefinition(CGM.getContext());
1889   assert(VD && "Unknown VarDecl");
1890 
1891   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1892     return CGM.getLangOpts().OpenMPIsDevice;
1893 
1894   QualType ASTTy = VD->getType();
1895   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1896 
1897   // Produce the unique prefix to identify the new target regions. We use
1898   // the source location of the variable declaration which we know to not
1899   // conflict with any target region.
1900   unsigned DeviceID;
1901   unsigned FileID;
1902   unsigned Line;
1903   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1904   SmallString<128> Buffer, Out;
1905   {
1906     llvm::raw_svector_ostream OS(Buffer);
1907     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1908        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1909   }
1910 
1911   const Expr *Init = VD->getAnyInitializer();
1912   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1913     llvm::Constant *Ctor;
1914     llvm::Constant *ID;
1915     if (CGM.getLangOpts().OpenMPIsDevice) {
1916       // Generate function that re-emits the declaration's initializer into
1917       // the threadprivate copy of the variable VD
1918       CodeGenFunction CtorCGF(CGM);
1919 
1920       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1921       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1922       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1923           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1924       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1925       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1926                             FunctionArgList(), Loc, Loc);
1927       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1928       CtorCGF.EmitAnyExprToMem(Init,
1929                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1930                                Init->getType().getQualifiers(),
1931                                /*IsInitializer=*/true);
1932       CtorCGF.FinishFunction();
1933       Ctor = Fn;
1934       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1935       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1936     } else {
1937       Ctor = new llvm::GlobalVariable(
1938           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1939           llvm::GlobalValue::PrivateLinkage,
1940           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1941       ID = Ctor;
1942     }
1943 
1944     // Register the information for the entry associated with the constructor.
1945     Out.clear();
1946     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1947         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1948         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1949   }
1950   if (VD->getType().isDestructedType() != QualType::DK_none) {
1951     llvm::Constant *Dtor;
1952     llvm::Constant *ID;
1953     if (CGM.getLangOpts().OpenMPIsDevice) {
1954       // Generate function that emits destructor call for the threadprivate
1955       // copy of the variable VD
1956       CodeGenFunction DtorCGF(CGM);
1957 
1958       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1959       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1960       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1961           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1962       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1963       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1964                             FunctionArgList(), Loc, Loc);
1965       // Create a scope with an artificial location for the body of this
1966       // function.
1967       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1968       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1969                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1970                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1971       DtorCGF.FinishFunction();
1972       Dtor = Fn;
1973       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1974       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1975     } else {
1976       Dtor = new llvm::GlobalVariable(
1977           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1978           llvm::GlobalValue::PrivateLinkage,
1979           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1980       ID = Dtor;
1981     }
1982     // Register the information for the entry associated with the destructor.
1983     Out.clear();
1984     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1985         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1986         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1987   }
1988   return CGM.getLangOpts().OpenMPIsDevice;
1989 }
1990 
1991 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1992                                                           QualType VarType,
1993                                                           StringRef Name) {
1994   std::string Suffix = getName({"artificial", ""});
1995   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1996   llvm::Value *GAddr =
1997       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1998   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1999       CGM.getTarget().isTLSSupported()) {
2000     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2001     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2002   }
2003   std::string CacheSuffix = getName({"cache", ""});
2004   llvm::Value *Args[] = {
2005       emitUpdateLocation(CGF, SourceLocation()),
2006       getThreadID(CGF, SourceLocation()),
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2008       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2009                                 /*isSigned=*/false),
2010       getOrCreateInternalVariable(
2011           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2012   return Address(
2013       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2014           CGF.EmitRuntimeCall(
2015               OMPBuilder.getOrCreateRuntimeFunction(
2016                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2017               Args),
2018           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2019       CGM.getContext().getTypeAlignInChars(VarType));
2020 }
2021 
2022 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2023                                    const RegionCodeGenTy &ThenGen,
2024                                    const RegionCodeGenTy &ElseGen) {
2025   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2026 
2027   // If the condition constant folds and can be elided, try to avoid emitting
2028   // the condition and the dead arm of the if/else.
2029   bool CondConstant;
2030   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2031     if (CondConstant)
2032       ThenGen(CGF);
2033     else
2034       ElseGen(CGF);
2035     return;
2036   }
2037 
2038   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2039   // emit the conditional branch.
2040   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2041   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2042   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2043   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2044 
2045   // Emit the 'then' code.
2046   CGF.EmitBlock(ThenBlock);
2047   ThenGen(CGF);
2048   CGF.EmitBranch(ContBlock);
2049   // Emit the 'else' code if present.
2050   // There is no need to emit line number for unconditional branch.
2051   (void)ApplyDebugLocation::CreateEmpty(CGF);
2052   CGF.EmitBlock(ElseBlock);
2053   ElseGen(CGF);
2054   // There is no need to emit line number for unconditional branch.
2055   (void)ApplyDebugLocation::CreateEmpty(CGF);
2056   CGF.EmitBranch(ContBlock);
2057   // Emit the continuation block for code after the if.
2058   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2059 }
2060 
2061 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2062                                        llvm::Function *OutlinedFn,
2063                                        ArrayRef<llvm::Value *> CapturedVars,
2064                                        const Expr *IfCond) {
2065   if (!CGF.HaveInsertPoint())
2066     return;
2067   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2068   auto &M = CGM.getModule();
2069   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2070                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2071     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2072     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2073     llvm::Value *Args[] = {
2074         RTLoc,
2075         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2076         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2077     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2078     RealArgs.append(std::begin(Args), std::end(Args));
2079     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2080 
2081     llvm::FunctionCallee RTLFn =
2082         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2083     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2084   };
2085   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2086                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2087     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2088     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2089     // Build calls:
2090     // __kmpc_serialized_parallel(&Loc, GTid);
2091     llvm::Value *Args[] = {RTLoc, ThreadID};
2092     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2093                             M, OMPRTL___kmpc_serialized_parallel),
2094                         Args);
2095 
2096     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2097     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2098     Address ZeroAddrBound =
2099         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2100                                          /*Name=*/".bound.zero.addr");
2101     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2102     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2103     // ThreadId for serialized parallels is 0.
2104     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2105     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2106     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2107 
2108     // Ensure we do not inline the function. This is trivially true for the ones
2109     // passed to __kmpc_fork_call but the ones calles in serialized regions
2110     // could be inlined. This is not a perfect but it is closer to the invariant
2111     // we want, namely, every data environment starts with a new function.
2112     // TODO: We should pass the if condition to the runtime function and do the
2113     //       handling there. Much cleaner code.
2114     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2115     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2116 
2117     // __kmpc_end_serialized_parallel(&Loc, GTid);
2118     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2119     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2120                             M, OMPRTL___kmpc_end_serialized_parallel),
2121                         EndArgs);
2122   };
2123   if (IfCond) {
2124     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2125   } else {
2126     RegionCodeGenTy ThenRCG(ThenGen);
2127     ThenRCG(CGF);
2128   }
2129 }
2130 
2131 // If we're inside an (outlined) parallel region, use the region info's
2132 // thread-ID variable (it is passed in a first argument of the outlined function
2133 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2134 // regular serial code region, get thread ID by calling kmp_int32
2135 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2136 // return the address of that temp.
2137 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2138                                              SourceLocation Loc) {
2139   if (auto *OMPRegionInfo =
2140           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2141     if (OMPRegionInfo->getThreadIDVariable())
2142       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2143 
2144   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2145   QualType Int32Ty =
2146       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2147   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2148   CGF.EmitStoreOfScalar(ThreadID,
2149                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2150 
2151   return ThreadIDTemp;
2152 }
2153 
2154 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2155     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2156   SmallString<256> Buffer;
2157   llvm::raw_svector_ostream Out(Buffer);
2158   Out << Name;
2159   StringRef RuntimeName = Out.str();
2160   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2161   if (Elem.second) {
2162     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2163            "OMP internal variable has different type than requested");
2164     return &*Elem.second;
2165   }
2166 
2167   return Elem.second = new llvm::GlobalVariable(
2168              CGM.getModule(), Ty, /*IsConstant*/ false,
2169              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2170              Elem.first(), /*InsertBefore=*/nullptr,
2171              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2172 }
2173 
2174 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2175   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2176   std::string Name = getName({Prefix, "var"});
2177   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2178 }
2179 
2180 namespace {
2181 /// Common pre(post)-action for different OpenMP constructs.
2182 class CommonActionTy final : public PrePostActionTy {
2183   llvm::FunctionCallee EnterCallee;
2184   ArrayRef<llvm::Value *> EnterArgs;
2185   llvm::FunctionCallee ExitCallee;
2186   ArrayRef<llvm::Value *> ExitArgs;
2187   bool Conditional;
2188   llvm::BasicBlock *ContBlock = nullptr;
2189 
2190 public:
2191   CommonActionTy(llvm::FunctionCallee EnterCallee,
2192                  ArrayRef<llvm::Value *> EnterArgs,
2193                  llvm::FunctionCallee ExitCallee,
2194                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2195       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2196         ExitArgs(ExitArgs), Conditional(Conditional) {}
2197   void Enter(CodeGenFunction &CGF) override {
2198     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2199     if (Conditional) {
2200       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2201       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2202       ContBlock = CGF.createBasicBlock("omp_if.end");
2203       // Generate the branch (If-stmt)
2204       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2205       CGF.EmitBlock(ThenBlock);
2206     }
2207   }
2208   void Done(CodeGenFunction &CGF) {
2209     // Emit the rest of blocks/branches
2210     CGF.EmitBranch(ContBlock);
2211     CGF.EmitBlock(ContBlock, true);
2212   }
2213   void Exit(CodeGenFunction &CGF) override {
2214     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2215   }
2216 };
2217 } // anonymous namespace
2218 
2219 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2220                                          StringRef CriticalName,
2221                                          const RegionCodeGenTy &CriticalOpGen,
2222                                          SourceLocation Loc, const Expr *Hint) {
2223   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2224   // CriticalOpGen();
2225   // __kmpc_end_critical(ident_t *, gtid, Lock);
2226   // Prepare arguments and build a call to __kmpc_critical
2227   if (!CGF.HaveInsertPoint())
2228     return;
2229   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2230                          getCriticalRegionLock(CriticalName)};
2231   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2232                                                 std::end(Args));
2233   if (Hint) {
2234     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2235         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2236   }
2237   CommonActionTy Action(
2238       OMPBuilder.getOrCreateRuntimeFunction(
2239           CGM.getModule(),
2240           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2241       EnterArgs,
2242       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2243                                             OMPRTL___kmpc_end_critical),
2244       Args);
2245   CriticalOpGen.setAction(Action);
2246   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2247 }
2248 
2249 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2250                                        const RegionCodeGenTy &MasterOpGen,
2251                                        SourceLocation Loc) {
2252   if (!CGF.HaveInsertPoint())
2253     return;
2254   // if(__kmpc_master(ident_t *, gtid)) {
2255   //   MasterOpGen();
2256   //   __kmpc_end_master(ident_t *, gtid);
2257   // }
2258   // Prepare arguments and build a call to __kmpc_master
2259   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2260   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2261                             CGM.getModule(), OMPRTL___kmpc_master),
2262                         Args,
2263                         OMPBuilder.getOrCreateRuntimeFunction(
2264                             CGM.getModule(), OMPRTL___kmpc_end_master),
2265                         Args,
2266                         /*Conditional=*/true);
2267   MasterOpGen.setAction(Action);
2268   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2269   Action.Done(CGF);
2270 }
2271 
2272 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2273                                         SourceLocation Loc) {
2274   if (!CGF.HaveInsertPoint())
2275     return;
2276   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2277     OMPBuilder.createTaskyield(CGF.Builder);
2278   } else {
2279     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2280     llvm::Value *Args[] = {
2281         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2282         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2283     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2284                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2285                         Args);
2286   }
2287 
2288   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2289     Region->emitUntiedSwitch(CGF);
2290 }
2291 
2292 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2293                                           const RegionCodeGenTy &TaskgroupOpGen,
2294                                           SourceLocation Loc) {
2295   if (!CGF.HaveInsertPoint())
2296     return;
2297   // __kmpc_taskgroup(ident_t *, gtid);
2298   // TaskgroupOpGen();
2299   // __kmpc_end_taskgroup(ident_t *, gtid);
2300   // Prepare arguments and build a call to __kmpc_taskgroup
2301   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2302   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2303                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2304                         Args,
2305                         OMPBuilder.getOrCreateRuntimeFunction(
2306                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2307                         Args);
2308   TaskgroupOpGen.setAction(Action);
2309   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2310 }
2311 
2312 /// Given an array of pointers to variables, project the address of a
2313 /// given variable.
2314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2315                                       unsigned Index, const VarDecl *Var) {
2316   // Pull out the pointer to the variable.
2317   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2318   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2319 
2320   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2321   Addr = CGF.Builder.CreateElementBitCast(
2322       Addr, CGF.ConvertTypeForMem(Var->getType()));
2323   return Addr;
2324 }
2325 
2326 static llvm::Value *emitCopyprivateCopyFunction(
2327     CodeGenModule &CGM, llvm::Type *ArgsType,
2328     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2329     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2330     SourceLocation Loc) {
2331   ASTContext &C = CGM.getContext();
2332   // void copy_func(void *LHSArg, void *RHSArg);
2333   FunctionArgList Args;
2334   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2335                            ImplicitParamDecl::Other);
2336   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2337                            ImplicitParamDecl::Other);
2338   Args.push_back(&LHSArg);
2339   Args.push_back(&RHSArg);
2340   const auto &CGFI =
2341       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2342   std::string Name =
2343       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2344   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2345                                     llvm::GlobalValue::InternalLinkage, Name,
2346                                     &CGM.getModule());
2347   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2348   Fn->setDoesNotRecurse();
2349   CodeGenFunction CGF(CGM);
2350   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2351   // Dest = (void*[n])(LHSArg);
2352   // Src = (void*[n])(RHSArg);
2353   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2354       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2355       ArgsType), CGF.getPointerAlign());
2356   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2357       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2358       ArgsType), CGF.getPointerAlign());
2359   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2360   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2361   // ...
2362   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2363   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2364     const auto *DestVar =
2365         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2366     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2367 
2368     const auto *SrcVar =
2369         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2370     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2371 
2372     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2373     QualType Type = VD->getType();
2374     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2375   }
2376   CGF.FinishFunction();
2377   return Fn;
2378 }
2379 
2380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2381                                        const RegionCodeGenTy &SingleOpGen,
2382                                        SourceLocation Loc,
2383                                        ArrayRef<const Expr *> CopyprivateVars,
2384                                        ArrayRef<const Expr *> SrcExprs,
2385                                        ArrayRef<const Expr *> DstExprs,
2386                                        ArrayRef<const Expr *> AssignmentOps) {
2387   if (!CGF.HaveInsertPoint())
2388     return;
2389   assert(CopyprivateVars.size() == SrcExprs.size() &&
2390          CopyprivateVars.size() == DstExprs.size() &&
2391          CopyprivateVars.size() == AssignmentOps.size());
2392   ASTContext &C = CGM.getContext();
2393   // int32 did_it = 0;
2394   // if(__kmpc_single(ident_t *, gtid)) {
2395   //   SingleOpGen();
2396   //   __kmpc_end_single(ident_t *, gtid);
2397   //   did_it = 1;
2398   // }
2399   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2400   // <copy_func>, did_it);
2401 
2402   Address DidIt = Address::invalid();
2403   if (!CopyprivateVars.empty()) {
2404     // int32 did_it = 0;
2405     QualType KmpInt32Ty =
2406         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2407     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2408     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2409   }
2410   // Prepare arguments and build a call to __kmpc_single
2411   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2412   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2413                             CGM.getModule(), OMPRTL___kmpc_single),
2414                         Args,
2415                         OMPBuilder.getOrCreateRuntimeFunction(
2416                             CGM.getModule(), OMPRTL___kmpc_end_single),
2417                         Args,
2418                         /*Conditional=*/true);
2419   SingleOpGen.setAction(Action);
2420   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2421   if (DidIt.isValid()) {
2422     // did_it = 1;
2423     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2424   }
2425   Action.Done(CGF);
2426   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2427   // <copy_func>, did_it);
2428   if (DidIt.isValid()) {
2429     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2430     QualType CopyprivateArrayTy = C.getConstantArrayType(
2431         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2432         /*IndexTypeQuals=*/0);
2433     // Create a list of all private variables for copyprivate.
2434     Address CopyprivateList =
2435         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2436     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2437       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2438       CGF.Builder.CreateStore(
2439           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2440               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2441               CGF.VoidPtrTy),
2442           Elem);
2443     }
2444     // Build function that copies private values from single region to all other
2445     // threads in the corresponding parallel region.
2446     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2447         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2448         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2449     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2450     Address CL =
2451       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2452                                                       CGF.VoidPtrTy);
2453     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2454     llvm::Value *Args[] = {
2455         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2456         getThreadID(CGF, Loc),        // i32 <gtid>
2457         BufSize,                      // size_t <buf_size>
2458         CL.getPointer(),              // void *<copyprivate list>
2459         CpyFn,                        // void (*) (void *, void *) <copy_func>
2460         DidItVal                      // i32 did_it
2461     };
2462     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2464                         Args);
2465   }
2466 }
2467 
2468 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2469                                         const RegionCodeGenTy &OrderedOpGen,
2470                                         SourceLocation Loc, bool IsThreads) {
2471   if (!CGF.HaveInsertPoint())
2472     return;
2473   // __kmpc_ordered(ident_t *, gtid);
2474   // OrderedOpGen();
2475   // __kmpc_end_ordered(ident_t *, gtid);
2476   // Prepare arguments and build a call to __kmpc_ordered
2477   if (IsThreads) {
2478     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2479     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2480                               CGM.getModule(), OMPRTL___kmpc_ordered),
2481                           Args,
2482                           OMPBuilder.getOrCreateRuntimeFunction(
2483                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2484                           Args);
2485     OrderedOpGen.setAction(Action);
2486     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2487     return;
2488   }
2489   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2490 }
2491 
2492 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2493   unsigned Flags;
2494   if (Kind == OMPD_for)
2495     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2496   else if (Kind == OMPD_sections)
2497     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2498   else if (Kind == OMPD_single)
2499     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2500   else if (Kind == OMPD_barrier)
2501     Flags = OMP_IDENT_BARRIER_EXPL;
2502   else
2503     Flags = OMP_IDENT_BARRIER_IMPL;
2504   return Flags;
2505 }
2506 
2507 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2508     CodeGenFunction &CGF, const OMPLoopDirective &S,
2509     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2510   // Check if the loop directive is actually a doacross loop directive. In this
2511   // case choose static, 1 schedule.
2512   if (llvm::any_of(
2513           S.getClausesOfKind<OMPOrderedClause>(),
2514           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2515     ScheduleKind = OMPC_SCHEDULE_static;
2516     // Chunk size is 1 in this case.
2517     llvm::APInt ChunkSize(32, 1);
2518     ChunkExpr = IntegerLiteral::Create(
2519         CGF.getContext(), ChunkSize,
2520         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2521         SourceLocation());
2522   }
2523 }
2524 
2525 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2526                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2527                                       bool ForceSimpleCall) {
2528   // Check if we should use the OMPBuilder
2529   auto *OMPRegionInfo =
2530       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2531   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2532     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2533         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2534     return;
2535   }
2536 
2537   if (!CGF.HaveInsertPoint())
2538     return;
2539   // Build call __kmpc_cancel_barrier(loc, thread_id);
2540   // Build call __kmpc_barrier(loc, thread_id);
2541   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2542   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2543   // thread_id);
2544   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2545                          getThreadID(CGF, Loc)};
2546   if (OMPRegionInfo) {
2547     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2548       llvm::Value *Result = CGF.EmitRuntimeCall(
2549           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2550                                                 OMPRTL___kmpc_cancel_barrier),
2551           Args);
2552       if (EmitChecks) {
2553         // if (__kmpc_cancel_barrier()) {
2554         //   exit from construct;
2555         // }
2556         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2557         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2558         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2559         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2560         CGF.EmitBlock(ExitBB);
2561         //   exit from construct;
2562         CodeGenFunction::JumpDest CancelDestination =
2563             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2564         CGF.EmitBranchThroughCleanup(CancelDestination);
2565         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2566       }
2567       return;
2568     }
2569   }
2570   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2571                           CGM.getModule(), OMPRTL___kmpc_barrier),
2572                       Args);
2573 }
2574 
2575 /// Map the OpenMP loop schedule to the runtime enumeration.
2576 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2577                                           bool Chunked, bool Ordered) {
2578   switch (ScheduleKind) {
2579   case OMPC_SCHEDULE_static:
2580     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2581                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2582   case OMPC_SCHEDULE_dynamic:
2583     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2584   case OMPC_SCHEDULE_guided:
2585     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2586   case OMPC_SCHEDULE_runtime:
2587     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2588   case OMPC_SCHEDULE_auto:
2589     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2590   case OMPC_SCHEDULE_unknown:
2591     assert(!Chunked && "chunk was specified but schedule kind not known");
2592     return Ordered ? OMP_ord_static : OMP_sch_static;
2593   }
2594   llvm_unreachable("Unexpected runtime schedule");
2595 }
2596 
2597 /// Map the OpenMP distribute schedule to the runtime enumeration.
2598 static OpenMPSchedType
2599 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2600   // only static is allowed for dist_schedule
2601   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2602 }
2603 
2604 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2605                                          bool Chunked) const {
2606   OpenMPSchedType Schedule =
2607       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2608   return Schedule == OMP_sch_static;
2609 }
2610 
2611 bool CGOpenMPRuntime::isStaticNonchunked(
2612     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2613   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2614   return Schedule == OMP_dist_sch_static;
2615 }
2616 
2617 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2618                                       bool Chunked) const {
2619   OpenMPSchedType Schedule =
2620       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2621   return Schedule == OMP_sch_static_chunked;
2622 }
2623 
2624 bool CGOpenMPRuntime::isStaticChunked(
2625     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2626   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2627   return Schedule == OMP_dist_sch_static_chunked;
2628 }
2629 
2630 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2631   OpenMPSchedType Schedule =
2632       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2633   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2634   return Schedule != OMP_sch_static;
2635 }
2636 
2637 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2638                                   OpenMPScheduleClauseModifier M1,
2639                                   OpenMPScheduleClauseModifier M2) {
2640   int Modifier = 0;
2641   switch (M1) {
2642   case OMPC_SCHEDULE_MODIFIER_monotonic:
2643     Modifier = OMP_sch_modifier_monotonic;
2644     break;
2645   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2646     Modifier = OMP_sch_modifier_nonmonotonic;
2647     break;
2648   case OMPC_SCHEDULE_MODIFIER_simd:
2649     if (Schedule == OMP_sch_static_chunked)
2650       Schedule = OMP_sch_static_balanced_chunked;
2651     break;
2652   case OMPC_SCHEDULE_MODIFIER_last:
2653   case OMPC_SCHEDULE_MODIFIER_unknown:
2654     break;
2655   }
2656   switch (M2) {
2657   case OMPC_SCHEDULE_MODIFIER_monotonic:
2658     Modifier = OMP_sch_modifier_monotonic;
2659     break;
2660   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2661     Modifier = OMP_sch_modifier_nonmonotonic;
2662     break;
2663   case OMPC_SCHEDULE_MODIFIER_simd:
2664     if (Schedule == OMP_sch_static_chunked)
2665       Schedule = OMP_sch_static_balanced_chunked;
2666     break;
2667   case OMPC_SCHEDULE_MODIFIER_last:
2668   case OMPC_SCHEDULE_MODIFIER_unknown:
2669     break;
2670   }
2671   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2672   // If the static schedule kind is specified or if the ordered clause is
2673   // specified, and if the nonmonotonic modifier is not specified, the effect is
2674   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2675   // modifier is specified, the effect is as if the nonmonotonic modifier is
2676   // specified.
2677   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2678     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2679           Schedule == OMP_sch_static_balanced_chunked ||
2680           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2681           Schedule == OMP_dist_sch_static_chunked ||
2682           Schedule == OMP_dist_sch_static))
2683       Modifier = OMP_sch_modifier_nonmonotonic;
2684   }
2685   return Schedule | Modifier;
2686 }
2687 
2688 void CGOpenMPRuntime::emitForDispatchInit(
2689     CodeGenFunction &CGF, SourceLocation Loc,
2690     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2691     bool Ordered, const DispatchRTInput &DispatchValues) {
2692   if (!CGF.HaveInsertPoint())
2693     return;
2694   OpenMPSchedType Schedule = getRuntimeSchedule(
2695       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2696   assert(Ordered ||
2697          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2698           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2699           Schedule != OMP_sch_static_balanced_chunked));
2700   // Call __kmpc_dispatch_init(
2701   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2702   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2703   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2704 
2705   // If the Chunk was not specified in the clause - use default value 1.
2706   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2707                                             : CGF.Builder.getIntN(IVSize, 1);
2708   llvm::Value *Args[] = {
2709       emitUpdateLocation(CGF, Loc),
2710       getThreadID(CGF, Loc),
2711       CGF.Builder.getInt32(addMonoNonMonoModifier(
2712           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2713       DispatchValues.LB,                                     // Lower
2714       DispatchValues.UB,                                     // Upper
2715       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2716       Chunk                                                  // Chunk
2717   };
2718   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2719 }
2720 
2721 static void emitForStaticInitCall(
2722     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2723     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2724     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2725     const CGOpenMPRuntime::StaticRTInput &Values) {
2726   if (!CGF.HaveInsertPoint())
2727     return;
2728 
2729   assert(!Values.Ordered);
2730   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2731          Schedule == OMP_sch_static_balanced_chunked ||
2732          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2733          Schedule == OMP_dist_sch_static ||
2734          Schedule == OMP_dist_sch_static_chunked);
2735 
2736   // Call __kmpc_for_static_init(
2737   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2738   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2739   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2740   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2741   llvm::Value *Chunk = Values.Chunk;
2742   if (Chunk == nullptr) {
2743     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2744             Schedule == OMP_dist_sch_static) &&
2745            "expected static non-chunked schedule");
2746     // If the Chunk was not specified in the clause - use default value 1.
2747     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2748   } else {
2749     assert((Schedule == OMP_sch_static_chunked ||
2750             Schedule == OMP_sch_static_balanced_chunked ||
2751             Schedule == OMP_ord_static_chunked ||
2752             Schedule == OMP_dist_sch_static_chunked) &&
2753            "expected static chunked schedule");
2754   }
2755   llvm::Value *Args[] = {
2756       UpdateLocation,
2757       ThreadId,
2758       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2759                                                   M2)), // Schedule type
2760       Values.IL.getPointer(),                           // &isLastIter
2761       Values.LB.getPointer(),                           // &LB
2762       Values.UB.getPointer(),                           // &UB
2763       Values.ST.getPointer(),                           // &Stride
2764       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2765       Chunk                                             // Chunk
2766   };
2767   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2768 }
2769 
2770 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2771                                         SourceLocation Loc,
2772                                         OpenMPDirectiveKind DKind,
2773                                         const OpenMPScheduleTy &ScheduleKind,
2774                                         const StaticRTInput &Values) {
2775   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2776       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2777   assert(isOpenMPWorksharingDirective(DKind) &&
2778          "Expected loop-based or sections-based directive.");
2779   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2780                                              isOpenMPLoopDirective(DKind)
2781                                                  ? OMP_IDENT_WORK_LOOP
2782                                                  : OMP_IDENT_WORK_SECTIONS);
2783   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2784   llvm::FunctionCallee StaticInitFunction =
2785       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2786   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2787   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2788                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2789 }
2790 
2791 void CGOpenMPRuntime::emitDistributeStaticInit(
2792     CodeGenFunction &CGF, SourceLocation Loc,
2793     OpenMPDistScheduleClauseKind SchedKind,
2794     const CGOpenMPRuntime::StaticRTInput &Values) {
2795   OpenMPSchedType ScheduleNum =
2796       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2797   llvm::Value *UpdatedLocation =
2798       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2799   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2800   llvm::FunctionCallee StaticInitFunction =
2801       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2802   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2803                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2804                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2805 }
2806 
2807 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2808                                           SourceLocation Loc,
2809                                           OpenMPDirectiveKind DKind) {
2810   if (!CGF.HaveInsertPoint())
2811     return;
2812   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2813   llvm::Value *Args[] = {
2814       emitUpdateLocation(CGF, Loc,
2815                          isOpenMPDistributeDirective(DKind)
2816                              ? OMP_IDENT_WORK_DISTRIBUTE
2817                              : isOpenMPLoopDirective(DKind)
2818                                    ? OMP_IDENT_WORK_LOOP
2819                                    : OMP_IDENT_WORK_SECTIONS),
2820       getThreadID(CGF, Loc)};
2821   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2822   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2823                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2824                       Args);
2825 }
2826 
2827 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2828                                                  SourceLocation Loc,
2829                                                  unsigned IVSize,
2830                                                  bool IVSigned) {
2831   if (!CGF.HaveInsertPoint())
2832     return;
2833   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2834   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2835   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2836 }
2837 
2838 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2839                                           SourceLocation Loc, unsigned IVSize,
2840                                           bool IVSigned, Address IL,
2841                                           Address LB, Address UB,
2842                                           Address ST) {
2843   // Call __kmpc_dispatch_next(
2844   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2845   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2846   //          kmp_int[32|64] *p_stride);
2847   llvm::Value *Args[] = {
2848       emitUpdateLocation(CGF, Loc),
2849       getThreadID(CGF, Loc),
2850       IL.getPointer(), // &isLastIter
2851       LB.getPointer(), // &Lower
2852       UB.getPointer(), // &Upper
2853       ST.getPointer()  // &Stride
2854   };
2855   llvm::Value *Call =
2856       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2857   return CGF.EmitScalarConversion(
2858       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2859       CGF.getContext().BoolTy, Loc);
2860 }
2861 
2862 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2863                                            llvm::Value *NumThreads,
2864                                            SourceLocation Loc) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2870       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2871   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2872                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2873                       Args);
2874 }
2875 
2876 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2877                                          ProcBindKind ProcBind,
2878                                          SourceLocation Loc) {
2879   if (!CGF.HaveInsertPoint())
2880     return;
2881   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2882   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2883   llvm::Value *Args[] = {
2884       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2885       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2886   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2887                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2888                       Args);
2889 }
2890 
2891 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2892                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2893   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2894     OMPBuilder.createFlush(CGF.Builder);
2895   } else {
2896     if (!CGF.HaveInsertPoint())
2897       return;
2898     // Build call void __kmpc_flush(ident_t *loc)
2899     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2900                             CGM.getModule(), OMPRTL___kmpc_flush),
2901                         emitUpdateLocation(CGF, Loc));
2902   }
2903 }
2904 
2905 namespace {
2906 /// Indexes of fields for type kmp_task_t.
2907 enum KmpTaskTFields {
2908   /// List of shared variables.
2909   KmpTaskTShareds,
2910   /// Task routine.
2911   KmpTaskTRoutine,
2912   /// Partition id for the untied tasks.
2913   KmpTaskTPartId,
2914   /// Function with call of destructors for private variables.
2915   Data1,
2916   /// Task priority.
2917   Data2,
2918   /// (Taskloops only) Lower bound.
2919   KmpTaskTLowerBound,
2920   /// (Taskloops only) Upper bound.
2921   KmpTaskTUpperBound,
2922   /// (Taskloops only) Stride.
2923   KmpTaskTStride,
2924   /// (Taskloops only) Is last iteration flag.
2925   KmpTaskTLastIter,
2926   /// (Taskloops only) Reduction data.
2927   KmpTaskTReductions,
2928 };
2929 } // anonymous namespace
2930 
2931 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2932   return OffloadEntriesTargetRegion.empty() &&
2933          OffloadEntriesDeviceGlobalVar.empty();
2934 }
2935 
2936 /// Initialize target region entry.
2937 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2938     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2939                                     StringRef ParentName, unsigned LineNum,
2940                                     unsigned Order) {
2941   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2942                                              "only required for the device "
2943                                              "code generation.");
2944   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2945       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2946                                    OMPTargetRegionEntryTargetRegion);
2947   ++OffloadingEntriesNum;
2948 }
2949 
2950 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2951     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2952                                   StringRef ParentName, unsigned LineNum,
2953                                   llvm::Constant *Addr, llvm::Constant *ID,
2954                                   OMPTargetRegionEntryKind Flags) {
2955   // If we are emitting code for a target, the entry is already initialized,
2956   // only has to be registered.
2957   if (CGM.getLangOpts().OpenMPIsDevice) {
2958     // This could happen if the device compilation is invoked standalone.
2959     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2960       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2961                                       OffloadingEntriesNum);
2962     auto &Entry =
2963         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2964     Entry.setAddress(Addr);
2965     Entry.setID(ID);
2966     Entry.setFlags(Flags);
2967   } else {
2968     if (Flags ==
2969             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2970         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2971                                  /*IgnoreAddressId*/ true))
2972       return;
2973     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2974            "Target region entry already registered!");
2975     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2976     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2977     ++OffloadingEntriesNum;
2978   }
2979 }
2980 
2981 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2982     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2983     bool IgnoreAddressId) const {
2984   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2985   if (PerDevice == OffloadEntriesTargetRegion.end())
2986     return false;
2987   auto PerFile = PerDevice->second.find(FileID);
2988   if (PerFile == PerDevice->second.end())
2989     return false;
2990   auto PerParentName = PerFile->second.find(ParentName);
2991   if (PerParentName == PerFile->second.end())
2992     return false;
2993   auto PerLine = PerParentName->second.find(LineNum);
2994   if (PerLine == PerParentName->second.end())
2995     return false;
2996   // Fail if this entry is already registered.
2997   if (!IgnoreAddressId &&
2998       (PerLine->second.getAddress() || PerLine->second.getID()))
2999     return false;
3000   return true;
3001 }
3002 
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3004     const OffloadTargetRegionEntryInfoActTy &Action) {
3005   // Scan all target region entries and perform the provided action.
3006   for (const auto &D : OffloadEntriesTargetRegion)
3007     for (const auto &F : D.second)
3008       for (const auto &P : F.second)
3009         for (const auto &L : P.second)
3010           Action(D.first, F.first, P.first(), L.first, L.second);
3011 }
3012 
3013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3014     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3015                                        OMPTargetGlobalVarEntryKind Flags,
3016                                        unsigned Order) {
3017   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3018                                              "only required for the device "
3019                                              "code generation.");
3020   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3021   ++OffloadingEntriesNum;
3022 }
3023 
3024 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3025     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3026                                      CharUnits VarSize,
3027                                      OMPTargetGlobalVarEntryKind Flags,
3028                                      llvm::GlobalValue::LinkageTypes Linkage) {
3029   if (CGM.getLangOpts().OpenMPIsDevice) {
3030     // This could happen if the device compilation is invoked standalone.
3031     if (!hasDeviceGlobalVarEntryInfo(VarName))
3032       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3033     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3034     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3035            "Resetting with the new address.");
3036     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3037       if (Entry.getVarSize().isZero()) {
3038         Entry.setVarSize(VarSize);
3039         Entry.setLinkage(Linkage);
3040       }
3041       return;
3042     }
3043     Entry.setVarSize(VarSize);
3044     Entry.setLinkage(Linkage);
3045     Entry.setAddress(Addr);
3046   } else {
3047     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3048       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3049       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3050              "Entry not initialized!");
3051       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3052              "Resetting with the new address.");
3053       if (Entry.getVarSize().isZero()) {
3054         Entry.setVarSize(VarSize);
3055         Entry.setLinkage(Linkage);
3056       }
3057       return;
3058     }
3059     OffloadEntriesDeviceGlobalVar.try_emplace(
3060         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3061     ++OffloadingEntriesNum;
3062   }
3063 }
3064 
3065 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3066     actOnDeviceGlobalVarEntriesInfo(
3067         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3068   // Scan all target region entries and perform the provided action.
3069   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3070     Action(E.getKey(), E.getValue());
3071 }
3072 
3073 void CGOpenMPRuntime::createOffloadEntry(
3074     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3075     llvm::GlobalValue::LinkageTypes Linkage) {
3076   StringRef Name = Addr->getName();
3077   llvm::Module &M = CGM.getModule();
3078   llvm::LLVMContext &C = M.getContext();
3079 
3080   // Create constant string with the name.
3081   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3082 
3083   std::string StringName = getName({"omp_offloading", "entry_name"});
3084   auto *Str = new llvm::GlobalVariable(
3085       M, StrPtrInit->getType(), /*isConstant=*/true,
3086       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3087   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3088 
3089   llvm::Constant *Data[] = {
3090       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3091       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3092       llvm::ConstantInt::get(CGM.SizeTy, Size),
3093       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3094       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3095   std::string EntryName = getName({"omp_offloading", "entry", ""});
3096   llvm::GlobalVariable *Entry = createGlobalStruct(
3097       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3098       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3099 
3100   // The entry has to be created in the section the linker expects it to be.
3101   Entry->setSection("omp_offloading_entries");
3102 }
3103 
3104 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3105   // Emit the offloading entries and metadata so that the device codegen side
3106   // can easily figure out what to emit. The produced metadata looks like
3107   // this:
3108   //
3109   // !omp_offload.info = !{!1, ...}
3110   //
3111   // Right now we only generate metadata for function that contain target
3112   // regions.
3113 
3114   // If we are in simd mode or there are no entries, we don't need to do
3115   // anything.
3116   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3117     return;
3118 
3119   llvm::Module &M = CGM.getModule();
3120   llvm::LLVMContext &C = M.getContext();
3121   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3122                          SourceLocation, StringRef>,
3123               16>
3124       OrderedEntries(OffloadEntriesInfoManager.size());
3125   llvm::SmallVector<StringRef, 16> ParentFunctions(
3126       OffloadEntriesInfoManager.size());
3127 
3128   // Auxiliary methods to create metadata values and strings.
3129   auto &&GetMDInt = [this](unsigned V) {
3130     return llvm::ConstantAsMetadata::get(
3131         llvm::ConstantInt::get(CGM.Int32Ty, V));
3132   };
3133 
3134   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3135 
3136   // Create the offloading info metadata node.
3137   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3138 
3139   // Create function that emits metadata for each target region entry;
3140   auto &&TargetRegionMetadataEmitter =
3141       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3142        &GetMDString](
3143           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3144           unsigned Line,
3145           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3146         // Generate metadata for target regions. Each entry of this metadata
3147         // contains:
3148         // - Entry 0 -> Kind of this type of metadata (0).
3149         // - Entry 1 -> Device ID of the file where the entry was identified.
3150         // - Entry 2 -> File ID of the file where the entry was identified.
3151         // - Entry 3 -> Mangled name of the function where the entry was
3152         // identified.
3153         // - Entry 4 -> Line in the file where the entry was identified.
3154         // - Entry 5 -> Order the entry was created.
3155         // The first element of the metadata node is the kind.
3156         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3157                                  GetMDInt(FileID),      GetMDString(ParentName),
3158                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3159 
3160         SourceLocation Loc;
3161         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3162                   E = CGM.getContext().getSourceManager().fileinfo_end();
3163              I != E; ++I) {
3164           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3165               I->getFirst()->getUniqueID().getFile() == FileID) {
3166             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3167                 I->getFirst(), Line, 1);
3168             break;
3169           }
3170         }
3171         // Save this entry in the right position of the ordered entries array.
3172         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3173         ParentFunctions[E.getOrder()] = ParentName;
3174 
3175         // Add metadata to the named metadata node.
3176         MD->addOperand(llvm::MDNode::get(C, Ops));
3177       };
3178 
3179   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3180       TargetRegionMetadataEmitter);
3181 
3182   // Create function that emits metadata for each device global variable entry;
3183   auto &&DeviceGlobalVarMetadataEmitter =
3184       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3185        MD](StringRef MangledName,
3186            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3187                &E) {
3188         // Generate metadata for global variables. Each entry of this metadata
3189         // contains:
3190         // - Entry 0 -> Kind of this type of metadata (1).
3191         // - Entry 1 -> Mangled name of the variable.
3192         // - Entry 2 -> Declare target kind.
3193         // - Entry 3 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {
3196             GetMDInt(E.getKind()), GetMDString(MangledName),
3197             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3198 
3199         // Save this entry in the right position of the ordered entries array.
3200         OrderedEntries[E.getOrder()] =
3201             std::make_tuple(&E, SourceLocation(), MangledName);
3202 
3203         // Add metadata to the named metadata node.
3204         MD->addOperand(llvm::MDNode::get(C, Ops));
3205       };
3206 
3207   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3208       DeviceGlobalVarMetadataEmitter);
3209 
3210   for (const auto &E : OrderedEntries) {
3211     assert(std::get<0>(E) && "All ordered entries must exist!");
3212     if (const auto *CE =
3213             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3214                 std::get<0>(E))) {
3215       if (!CE->getID() || !CE->getAddress()) {
3216         // Do not blame the entry if the parent funtion is not emitted.
3217         StringRef FnName = ParentFunctions[CE->getOrder()];
3218         if (!CGM.GetGlobalValue(FnName))
3219           continue;
3220         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3221             DiagnosticsEngine::Error,
3222             "Offloading entry for target region in %0 is incorrect: either the "
3223             "address or the ID is invalid.");
3224         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3225         continue;
3226       }
3227       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3228                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3229     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3230                                              OffloadEntryInfoDeviceGlobalVar>(
3231                    std::get<0>(E))) {
3232       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3233           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3234               CE->getFlags());
3235       switch (Flags) {
3236       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3237         if (CGM.getLangOpts().OpenMPIsDevice &&
3238             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3239           continue;
3240         if (!CE->getAddress()) {
3241           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3242               DiagnosticsEngine::Error, "Offloading entry for declare target "
3243                                         "variable %0 is incorrect: the "
3244                                         "address is invalid.");
3245           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3246           continue;
3247         }
3248         // The vaiable has no definition - no need to add the entry.
3249         if (CE->getVarSize().isZero())
3250           continue;
3251         break;
3252       }
3253       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3254         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3255                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3256                "Declaret target link address is set.");
3257         if (CGM.getLangOpts().OpenMPIsDevice)
3258           continue;
3259         if (!CE->getAddress()) {
3260           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3261               DiagnosticsEngine::Error,
3262               "Offloading entry for declare target variable is incorrect: the "
3263               "address is invalid.");
3264           CGM.getDiags().Report(DiagID);
3265           continue;
3266         }
3267         break;
3268       }
3269       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270                          CE->getVarSize().getQuantity(), Flags,
3271                          CE->getLinkage());
3272     } else {
3273       llvm_unreachable("Unsupported entry kind.");
3274     }
3275   }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281   // If we are in target mode, load the metadata from the host IR. This code has
3282   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284   if (!CGM.getLangOpts().OpenMPIsDevice)
3285     return;
3286 
3287   if (CGM.getLangOpts().OMPHostIRFile.empty())
3288     return;
3289 
3290   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291   if (auto EC = Buf.getError()) {
3292     CGM.getDiags().Report(diag::err_cannot_open_file)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::LLVMContext C;
3298   auto ME = expectedToErrorOrAndEmitErrors(
3299       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301   if (auto EC = ME.getError()) {
3302     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304     CGM.getDiags().Report(DiagID)
3305         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306     return;
3307   }
3308 
3309   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310   if (!MD)
3311     return;
3312 
3313   for (llvm::MDNode *MN : MD->operands()) {
3314     auto &&GetMDInt = [MN](unsigned Idx) {
3315       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317     };
3318 
3319     auto &&GetMDString = [MN](unsigned Idx) {
3320       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321       return V->getString();
3322     };
3323 
3324     switch (GetMDInt(0)) {
3325     default:
3326       llvm_unreachable("Unexpected metadata!");
3327       break;
3328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329         OffloadingEntryInfoTargetRegion:
3330       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333           /*Order=*/GetMDInt(5));
3334       break;
3335     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336         OffloadingEntryInfoDeviceGlobalVar:
3337       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338           /*MangledName=*/GetMDString(1),
3339           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340               /*Flags=*/GetMDInt(2)),
3341           /*Order=*/GetMDInt(3));
3342       break;
3343     }
3344   }
3345 }
3346 
3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348   if (!KmpRoutineEntryPtrTy) {
3349     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350     ASTContext &C = CGM.getContext();
3351     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352     FunctionProtoType::ExtProtoInfo EPI;
3353     KmpRoutineEntryPtrQTy = C.getPointerType(
3354         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356   }
3357 }
3358 
3359 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3360   // Make sure the type of the entry is already created. This is the type we
3361   // have to create:
3362   // struct __tgt_offload_entry{
3363   //   void      *addr;       // Pointer to the offload entry info.
3364   //                          // (function or global)
3365   //   char      *name;       // Name of the function or global.
3366   //   size_t     size;       // Size of the entry info (0 if it a function).
3367   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3368   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3369   // };
3370   if (TgtOffloadEntryQTy.isNull()) {
3371     ASTContext &C = CGM.getContext();
3372     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3373     RD->startDefinition();
3374     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3375     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3376     addFieldToRecordDecl(C, RD, C.getSizeType());
3377     addFieldToRecordDecl(
3378         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3379     addFieldToRecordDecl(
3380         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3381     RD->completeDefinition();
3382     RD->addAttr(PackedAttr::CreateImplicit(C));
3383     TgtOffloadEntryQTy = C.getRecordType(RD);
3384   }
3385   return TgtOffloadEntryQTy;
3386 }
3387 
3388 namespace {
3389 struct PrivateHelpersTy {
3390   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3391                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3392       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3393         PrivateElemInit(PrivateElemInit) {}
3394   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3395   const Expr *OriginalRef = nullptr;
3396   const VarDecl *Original = nullptr;
3397   const VarDecl *PrivateCopy = nullptr;
3398   const VarDecl *PrivateElemInit = nullptr;
3399   bool isLocalPrivate() const {
3400     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3401   }
3402 };
3403 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3404 } // anonymous namespace
3405 
3406 static bool isAllocatableDecl(const VarDecl *VD) {
3407   const VarDecl *CVD = VD->getCanonicalDecl();
3408   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3409     return false;
3410   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3411   // Use the default allocation.
3412   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3413             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3414            !AA->getAllocator());
3415 }
3416 
3417 static RecordDecl *
3418 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3419   if (!Privates.empty()) {
3420     ASTContext &C = CGM.getContext();
3421     // Build struct .kmp_privates_t. {
3422     //         /*  private vars  */
3423     //       };
3424     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3425     RD->startDefinition();
3426     for (const auto &Pair : Privates) {
3427       const VarDecl *VD = Pair.second.Original;
3428       QualType Type = VD->getType().getNonReferenceType();
3429       // If the private variable is a local variable with lvalue ref type,
3430       // allocate the pointer instead of the pointee type.
3431       if (Pair.second.isLocalPrivate()) {
3432         if (VD->getType()->isLValueReferenceType())
3433           Type = C.getPointerType(Type);
3434         if (isAllocatableDecl(VD))
3435           Type = C.getPointerType(Type);
3436       }
3437       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3438       if (VD->hasAttrs()) {
3439         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3440              E(VD->getAttrs().end());
3441              I != E; ++I)
3442           FD->addAttr(*I);
3443       }
3444     }
3445     RD->completeDefinition();
3446     return RD;
3447   }
3448   return nullptr;
3449 }
3450 
3451 static RecordDecl *
3452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3453                          QualType KmpInt32Ty,
3454                          QualType KmpRoutineEntryPointerQTy) {
3455   ASTContext &C = CGM.getContext();
3456   // Build struct kmp_task_t {
3457   //         void *              shareds;
3458   //         kmp_routine_entry_t routine;
3459   //         kmp_int32           part_id;
3460   //         kmp_cmplrdata_t data1;
3461   //         kmp_cmplrdata_t data2;
3462   // For taskloops additional fields:
3463   //         kmp_uint64          lb;
3464   //         kmp_uint64          ub;
3465   //         kmp_int64           st;
3466   //         kmp_int32           liter;
3467   //         void *              reductions;
3468   //       };
3469   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3470   UD->startDefinition();
3471   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3472   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3473   UD->completeDefinition();
3474   QualType KmpCmplrdataTy = C.getRecordType(UD);
3475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3476   RD->startDefinition();
3477   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3478   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3479   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3480   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3481   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3482   if (isOpenMPTaskLoopDirective(Kind)) {
3483     QualType KmpUInt64Ty =
3484         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3485     QualType KmpInt64Ty =
3486         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3487     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3488     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3489     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3490     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3491     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3492   }
3493   RD->completeDefinition();
3494   return RD;
3495 }
3496 
3497 static RecordDecl *
3498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3499                                      ArrayRef<PrivateDataTy> Privates) {
3500   ASTContext &C = CGM.getContext();
3501   // Build struct kmp_task_t_with_privates {
3502   //         kmp_task_t task_data;
3503   //         .kmp_privates_t. privates;
3504   //       };
3505   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3506   RD->startDefinition();
3507   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3508   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3509     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3510   RD->completeDefinition();
3511   return RD;
3512 }
3513 
3514 /// Emit a proxy function which accepts kmp_task_t as the second
3515 /// argument.
3516 /// \code
3517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3518 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3519 ///   For taskloops:
3520 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3521 ///   tt->reductions, tt->shareds);
3522 ///   return 0;
3523 /// }
3524 /// \endcode
3525 static llvm::Function *
3526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3527                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3528                       QualType KmpTaskTWithPrivatesPtrQTy,
3529                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3530                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3531                       llvm::Value *TaskPrivatesMap) {
3532   ASTContext &C = CGM.getContext();
3533   FunctionArgList Args;
3534   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3535                             ImplicitParamDecl::Other);
3536   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3537                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3538                                 ImplicitParamDecl::Other);
3539   Args.push_back(&GtidArg);
3540   Args.push_back(&TaskTypeArg);
3541   const auto &TaskEntryFnInfo =
3542       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3543   llvm::FunctionType *TaskEntryTy =
3544       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3545   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3546   auto *TaskEntry = llvm::Function::Create(
3547       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3548   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3549   TaskEntry->setDoesNotRecurse();
3550   CodeGenFunction CGF(CGM);
3551   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3552                     Loc, Loc);
3553 
3554   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3555   // tt,
3556   // For taskloops:
3557   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3558   // tt->task_data.shareds);
3559   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3560       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3561   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3562       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3563       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3564   const auto *KmpTaskTWithPrivatesQTyRD =
3565       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3566   LValue Base =
3567       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3568   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3569   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3570   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3571   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3572 
3573   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3574   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3575   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3576       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3577       CGF.ConvertTypeForMem(SharedsPtrTy));
3578 
3579   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3580   llvm::Value *PrivatesParam;
3581   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3582     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3583     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3584         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3585   } else {
3586     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3587   }
3588 
3589   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3590                                TaskPrivatesMap,
3591                                CGF.Builder
3592                                    .CreatePointerBitCastOrAddrSpaceCast(
3593                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3594                                    .getPointer()};
3595   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3596                                           std::end(CommonArgs));
3597   if (isOpenMPTaskLoopDirective(Kind)) {
3598     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3599     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3600     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3601     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3602     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3603     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3604     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3605     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3606     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3607     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3608     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3609     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3610     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3611     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3612     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3613     CallArgs.push_back(LBParam);
3614     CallArgs.push_back(UBParam);
3615     CallArgs.push_back(StParam);
3616     CallArgs.push_back(LIParam);
3617     CallArgs.push_back(RParam);
3618   }
3619   CallArgs.push_back(SharedsParam);
3620 
3621   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3622                                                   CallArgs);
3623   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3624                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3625   CGF.FinishFunction();
3626   return TaskEntry;
3627 }
3628 
3629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3630                                             SourceLocation Loc,
3631                                             QualType KmpInt32Ty,
3632                                             QualType KmpTaskTWithPrivatesPtrQTy,
3633                                             QualType KmpTaskTWithPrivatesQTy) {
3634   ASTContext &C = CGM.getContext();
3635   FunctionArgList Args;
3636   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3637                             ImplicitParamDecl::Other);
3638   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3639                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3640                                 ImplicitParamDecl::Other);
3641   Args.push_back(&GtidArg);
3642   Args.push_back(&TaskTypeArg);
3643   const auto &DestructorFnInfo =
3644       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3645   llvm::FunctionType *DestructorFnTy =
3646       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3647   std::string Name =
3648       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3649   auto *DestructorFn =
3650       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3651                              Name, &CGM.getModule());
3652   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3653                                     DestructorFnInfo);
3654   DestructorFn->setDoesNotRecurse();
3655   CodeGenFunction CGF(CGM);
3656   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3657                     Args, Loc, Loc);
3658 
3659   LValue Base = CGF.EmitLoadOfPointerLValue(
3660       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3661       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3662   const auto *KmpTaskTWithPrivatesQTyRD =
3663       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3664   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3665   Base = CGF.EmitLValueForField(Base, *FI);
3666   for (const auto *Field :
3667        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3668     if (QualType::DestructionKind DtorKind =
3669             Field->getType().isDestructedType()) {
3670       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3671       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3672     }
3673   }
3674   CGF.FinishFunction();
3675   return DestructorFn;
3676 }
3677 
3678 /// Emit a privates mapping function for correct handling of private and
3679 /// firstprivate variables.
3680 /// \code
3681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3682 /// **noalias priv1,...,  <tyn> **noalias privn) {
3683 ///   *priv1 = &.privates.priv1;
3684 ///   ...;
3685 ///   *privn = &.privates.privn;
3686 /// }
3687 /// \endcode
3688 static llvm::Value *
3689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3690                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3691                                ArrayRef<PrivateDataTy> Privates) {
3692   ASTContext &C = CGM.getContext();
3693   FunctionArgList Args;
3694   ImplicitParamDecl TaskPrivatesArg(
3695       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3697       ImplicitParamDecl::Other);
3698   Args.push_back(&TaskPrivatesArg);
3699   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3700   unsigned Counter = 1;
3701   for (const Expr *E : Data.PrivateVars) {
3702     Args.push_back(ImplicitParamDecl::Create(
3703         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3704         C.getPointerType(C.getPointerType(E->getType()))
3705             .withConst()
3706             .withRestrict(),
3707         ImplicitParamDecl::Other));
3708     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3709     PrivateVarsPos[VD] = Counter;
3710     ++Counter;
3711   }
3712   for (const Expr *E : Data.FirstprivateVars) {
3713     Args.push_back(ImplicitParamDecl::Create(
3714         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3715         C.getPointerType(C.getPointerType(E->getType()))
3716             .withConst()
3717             .withRestrict(),
3718         ImplicitParamDecl::Other));
3719     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3720     PrivateVarsPos[VD] = Counter;
3721     ++Counter;
3722   }
3723   for (const Expr *E : Data.LastprivateVars) {
3724     Args.push_back(ImplicitParamDecl::Create(
3725         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3726         C.getPointerType(C.getPointerType(E->getType()))
3727             .withConst()
3728             .withRestrict(),
3729         ImplicitParamDecl::Other));
3730     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3731     PrivateVarsPos[VD] = Counter;
3732     ++Counter;
3733   }
3734   for (const VarDecl *VD : Data.PrivateLocals) {
3735     QualType Ty = VD->getType().getNonReferenceType();
3736     if (VD->getType()->isLValueReferenceType())
3737       Ty = C.getPointerType(Ty);
3738     if (isAllocatableDecl(VD))
3739       Ty = C.getPointerType(Ty);
3740     Args.push_back(ImplicitParamDecl::Create(
3741         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3742         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3743         ImplicitParamDecl::Other));
3744     PrivateVarsPos[VD] = Counter;
3745     ++Counter;
3746   }
3747   const auto &TaskPrivatesMapFnInfo =
3748       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3749   llvm::FunctionType *TaskPrivatesMapTy =
3750       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3751   std::string Name =
3752       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3753   auto *TaskPrivatesMap = llvm::Function::Create(
3754       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3755       &CGM.getModule());
3756   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3757                                     TaskPrivatesMapFnInfo);
3758   if (CGM.getLangOpts().Optimize) {
3759     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3760     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3761     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3762   }
3763   CodeGenFunction CGF(CGM);
3764   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3765                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3766 
3767   // *privi = &.privates.privi;
3768   LValue Base = CGF.EmitLoadOfPointerLValue(
3769       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3770       TaskPrivatesArg.getType()->castAs<PointerType>());
3771   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3772   Counter = 0;
3773   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3774     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3775     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3776     LValue RefLVal =
3777         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3778     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3779         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3780     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3781     ++Counter;
3782   }
3783   CGF.FinishFunction();
3784   return TaskPrivatesMap;
3785 }
3786 
3787 /// Emit initialization for private variables in task-based directives.
3788 static void emitPrivatesInit(CodeGenFunction &CGF,
3789                              const OMPExecutableDirective &D,
3790                              Address KmpTaskSharedsPtr, LValue TDBase,
3791                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3792                              QualType SharedsTy, QualType SharedsPtrTy,
3793                              const OMPTaskDataTy &Data,
3794                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3795   ASTContext &C = CGF.getContext();
3796   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3797   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3798   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3799                                  ? OMPD_taskloop
3800                                  : OMPD_task;
3801   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3802   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3803   LValue SrcBase;
3804   bool IsTargetTask =
3805       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3806       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3807   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3808   // PointersArray, SizesArray, and MappersArray. The original variables for
3809   // these arrays are not captured and we get their addresses explicitly.
3810   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3811       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3812     SrcBase = CGF.MakeAddrLValue(
3813         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3814             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3815         SharedsTy);
3816   }
3817   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3818   for (const PrivateDataTy &Pair : Privates) {
3819     // Do not initialize private locals.
3820     if (Pair.second.isLocalPrivate()) {
3821       ++FI;
3822       continue;
3823     }
3824     const VarDecl *VD = Pair.second.PrivateCopy;
3825     const Expr *Init = VD->getAnyInitializer();
3826     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3827                              !CGF.isTrivialInitializer(Init)))) {
3828       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3829       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3830         const VarDecl *OriginalVD = Pair.second.Original;
3831         // Check if the variable is the target-based BasePointersArray,
3832         // PointersArray, SizesArray, or MappersArray.
3833         LValue SharedRefLValue;
3834         QualType Type = PrivateLValue.getType();
3835         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3836         if (IsTargetTask && !SharedField) {
3837           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3838                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3839                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3840                          ->getNumParams() == 0 &&
3841                  isa<TranslationUnitDecl>(
3842                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3843                          ->getDeclContext()) &&
3844                  "Expected artificial target data variable.");
3845           SharedRefLValue =
3846               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3847         } else if (ForDup) {
3848           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3849           SharedRefLValue = CGF.MakeAddrLValue(
3850               Address(SharedRefLValue.getPointer(CGF),
3851                       C.getDeclAlign(OriginalVD)),
3852               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3853               SharedRefLValue.getTBAAInfo());
3854         } else if (CGF.LambdaCaptureFields.count(
3855                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3856                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3857           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3858         } else {
3859           // Processing for implicitly captured variables.
3860           InlinedOpenMPRegionRAII Region(
3861               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3862               /*HasCancel=*/false, /*NoInheritance=*/true);
3863           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3864         }
3865         if (Type->isArrayType()) {
3866           // Initialize firstprivate array.
3867           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3868             // Perform simple memcpy.
3869             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3870           } else {
3871             // Initialize firstprivate array using element-by-element
3872             // initialization.
3873             CGF.EmitOMPAggregateAssign(
3874                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3875                 Type,
3876                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3877                                                   Address SrcElement) {
3878                   // Clean up any temporaries needed by the initialization.
3879                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3880                   InitScope.addPrivate(
3881                       Elem, [SrcElement]() -> Address { return SrcElement; });
3882                   (void)InitScope.Privatize();
3883                   // Emit initialization for single element.
3884                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3885                       CGF, &CapturesInfo);
3886                   CGF.EmitAnyExprToMem(Init, DestElement,
3887                                        Init->getType().getQualifiers(),
3888                                        /*IsInitializer=*/false);
3889                 });
3890           }
3891         } else {
3892           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3893           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3894             return SharedRefLValue.getAddress(CGF);
3895           });
3896           (void)InitScope.Privatize();
3897           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3898           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3899                              /*capturedByInit=*/false);
3900         }
3901       } else {
3902         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3903       }
3904     }
3905     ++FI;
3906   }
3907 }
3908 
3909 /// Check if duplication function is required for taskloops.
3910 static bool checkInitIsRequired(CodeGenFunction &CGF,
3911                                 ArrayRef<PrivateDataTy> Privates) {
3912   bool InitRequired = false;
3913   for (const PrivateDataTy &Pair : Privates) {
3914     if (Pair.second.isLocalPrivate())
3915       continue;
3916     const VarDecl *VD = Pair.second.PrivateCopy;
3917     const Expr *Init = VD->getAnyInitializer();
3918     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3919                                     !CGF.isTrivialInitializer(Init));
3920     if (InitRequired)
3921       break;
3922   }
3923   return InitRequired;
3924 }
3925 
3926 
3927 /// Emit task_dup function (for initialization of
3928 /// private/firstprivate/lastprivate vars and last_iter flag)
3929 /// \code
3930 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3931 /// lastpriv) {
3932 /// // setup lastprivate flag
3933 ///    task_dst->last = lastpriv;
3934 /// // could be constructor calls here...
3935 /// }
3936 /// \endcode
3937 static llvm::Value *
3938 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3939                     const OMPExecutableDirective &D,
3940                     QualType KmpTaskTWithPrivatesPtrQTy,
3941                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3942                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3943                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3944                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3945   ASTContext &C = CGM.getContext();
3946   FunctionArgList Args;
3947   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3948                            KmpTaskTWithPrivatesPtrQTy,
3949                            ImplicitParamDecl::Other);
3950   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3951                            KmpTaskTWithPrivatesPtrQTy,
3952                            ImplicitParamDecl::Other);
3953   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3954                                 ImplicitParamDecl::Other);
3955   Args.push_back(&DstArg);
3956   Args.push_back(&SrcArg);
3957   Args.push_back(&LastprivArg);
3958   const auto &TaskDupFnInfo =
3959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3960   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3961   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3962   auto *TaskDup = llvm::Function::Create(
3963       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3964   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3965   TaskDup->setDoesNotRecurse();
3966   CodeGenFunction CGF(CGM);
3967   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3968                     Loc);
3969 
3970   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3971       CGF.GetAddrOfLocalVar(&DstArg),
3972       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3973   // task_dst->liter = lastpriv;
3974   if (WithLastIter) {
3975     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3976     LValue Base = CGF.EmitLValueForField(
3977         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3978     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3979     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3980         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3981     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3982   }
3983 
3984   // Emit initial values for private copies (if any).
3985   assert(!Privates.empty());
3986   Address KmpTaskSharedsPtr = Address::invalid();
3987   if (!Data.FirstprivateVars.empty()) {
3988     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3989         CGF.GetAddrOfLocalVar(&SrcArg),
3990         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3991     LValue Base = CGF.EmitLValueForField(
3992         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3993     KmpTaskSharedsPtr = Address(
3994         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3995                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3996                                                   KmpTaskTShareds)),
3997                              Loc),
3998         CGM.getNaturalTypeAlignment(SharedsTy));
3999   }
4000   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4001                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4002   CGF.FinishFunction();
4003   return TaskDup;
4004 }
4005 
4006 /// Checks if destructor function is required to be generated.
4007 /// \return true if cleanups are required, false otherwise.
4008 static bool
4009 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4010                          ArrayRef<PrivateDataTy> Privates) {
4011   for (const PrivateDataTy &P : Privates) {
4012     if (P.second.isLocalPrivate())
4013       continue;
4014     QualType Ty = P.second.Original->getType().getNonReferenceType();
4015     if (Ty.isDestructedType())
4016       return true;
4017   }
4018   return false;
4019 }
4020 
4021 namespace {
4022 /// Loop generator for OpenMP iterator expression.
4023 class OMPIteratorGeneratorScope final
4024     : public CodeGenFunction::OMPPrivateScope {
4025   CodeGenFunction &CGF;
4026   const OMPIteratorExpr *E = nullptr;
4027   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4028   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4029   OMPIteratorGeneratorScope() = delete;
4030   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4031 
4032 public:
4033   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4034       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4035     if (!E)
4036       return;
4037     SmallVector<llvm::Value *, 4> Uppers;
4038     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4039       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4040       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4041       addPrivate(VD, [&CGF, VD]() {
4042         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4043       });
4044       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4045       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4046         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4047                                  "counter.addr");
4048       });
4049     }
4050     Privatize();
4051 
4052     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4053       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4054       LValue CLVal =
4055           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4056                              HelperData.CounterVD->getType());
4057       // Counter = 0;
4058       CGF.EmitStoreOfScalar(
4059           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4060           CLVal);
4061       CodeGenFunction::JumpDest &ContDest =
4062           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4063       CodeGenFunction::JumpDest &ExitDest =
4064           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4065       // N = <number-of_iterations>;
4066       llvm::Value *N = Uppers[I];
4067       // cont:
4068       // if (Counter < N) goto body; else goto exit;
4069       CGF.EmitBlock(ContDest.getBlock());
4070       auto *CVal =
4071           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4072       llvm::Value *Cmp =
4073           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4074               ? CGF.Builder.CreateICmpSLT(CVal, N)
4075               : CGF.Builder.CreateICmpULT(CVal, N);
4076       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4077       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4078       // body:
4079       CGF.EmitBlock(BodyBB);
4080       // Iteri = Begini + Counter * Stepi;
4081       CGF.EmitIgnoredExpr(HelperData.Update);
4082     }
4083   }
4084   ~OMPIteratorGeneratorScope() {
4085     if (!E)
4086       return;
4087     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4088       // Counter = Counter + 1;
4089       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4090       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4091       // goto cont;
4092       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4093       // exit:
4094       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4095     }
4096   }
4097 };
4098 } // namespace
4099 
4100 static std::pair<llvm::Value *, llvm::Value *>
4101 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4102   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4103   llvm::Value *Addr;
4104   if (OASE) {
4105     const Expr *Base = OASE->getBase();
4106     Addr = CGF.EmitScalarExpr(Base);
4107   } else {
4108     Addr = CGF.EmitLValue(E).getPointer(CGF);
4109   }
4110   llvm::Value *SizeVal;
4111   QualType Ty = E->getType();
4112   if (OASE) {
4113     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4114     for (const Expr *SE : OASE->getDimensions()) {
4115       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4116       Sz = CGF.EmitScalarConversion(
4117           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4118       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4119     }
4120   } else if (const auto *ASE =
4121                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4122     LValue UpAddrLVal =
4123         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4124     llvm::Value *UpAddr =
4125         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4126     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4127     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4128     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4129   } else {
4130     SizeVal = CGF.getTypeSize(Ty);
4131   }
4132   return std::make_pair(Addr, SizeVal);
4133 }
4134 
4135 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4136 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4137   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4138   if (KmpTaskAffinityInfoTy.isNull()) {
4139     RecordDecl *KmpAffinityInfoRD =
4140         C.buildImplicitRecord("kmp_task_affinity_info_t");
4141     KmpAffinityInfoRD->startDefinition();
4142     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4143     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4144     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4145     KmpAffinityInfoRD->completeDefinition();
4146     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4147   }
4148 }
4149 
4150 CGOpenMPRuntime::TaskResultTy
4151 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4152                               const OMPExecutableDirective &D,
4153                               llvm::Function *TaskFunction, QualType SharedsTy,
4154                               Address Shareds, const OMPTaskDataTy &Data) {
4155   ASTContext &C = CGM.getContext();
4156   llvm::SmallVector<PrivateDataTy, 4> Privates;
4157   // Aggregate privates and sort them by the alignment.
4158   const auto *I = Data.PrivateCopies.begin();
4159   for (const Expr *E : Data.PrivateVars) {
4160     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4161     Privates.emplace_back(
4162         C.getDeclAlign(VD),
4163         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4164                          /*PrivateElemInit=*/nullptr));
4165     ++I;
4166   }
4167   I = Data.FirstprivateCopies.begin();
4168   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4169   for (const Expr *E : Data.FirstprivateVars) {
4170     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4171     Privates.emplace_back(
4172         C.getDeclAlign(VD),
4173         PrivateHelpersTy(
4174             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4175             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4176     ++I;
4177     ++IElemInitRef;
4178   }
4179   I = Data.LastprivateCopies.begin();
4180   for (const Expr *E : Data.LastprivateVars) {
4181     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4182     Privates.emplace_back(
4183         C.getDeclAlign(VD),
4184         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4185                          /*PrivateElemInit=*/nullptr));
4186     ++I;
4187   }
4188   for (const VarDecl *VD : Data.PrivateLocals) {
4189     if (isAllocatableDecl(VD))
4190       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4191     else
4192       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4193   }
4194   llvm::stable_sort(Privates,
4195                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4196                       return L.first > R.first;
4197                     });
4198   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4199   // Build type kmp_routine_entry_t (if not built yet).
4200   emitKmpRoutineEntryT(KmpInt32Ty);
4201   // Build type kmp_task_t (if not built yet).
4202   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4203     if (SavedKmpTaskloopTQTy.isNull()) {
4204       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4205           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4206     }
4207     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4208   } else {
4209     assert((D.getDirectiveKind() == OMPD_task ||
4210             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4211             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4212            "Expected taskloop, task or target directive");
4213     if (SavedKmpTaskTQTy.isNull()) {
4214       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4215           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4216     }
4217     KmpTaskTQTy = SavedKmpTaskTQTy;
4218   }
4219   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4220   // Build particular struct kmp_task_t for the given task.
4221   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4222       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4223   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4224   QualType KmpTaskTWithPrivatesPtrQTy =
4225       C.getPointerType(KmpTaskTWithPrivatesQTy);
4226   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4227   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4228       KmpTaskTWithPrivatesTy->getPointerTo();
4229   llvm::Value *KmpTaskTWithPrivatesTySize =
4230       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4231   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4232 
4233   // Emit initial values for private copies (if any).
4234   llvm::Value *TaskPrivatesMap = nullptr;
4235   llvm::Type *TaskPrivatesMapTy =
4236       std::next(TaskFunction->arg_begin(), 3)->getType();
4237   if (!Privates.empty()) {
4238     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4239     TaskPrivatesMap =
4240         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4241     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4242         TaskPrivatesMap, TaskPrivatesMapTy);
4243   } else {
4244     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4245         cast<llvm::PointerType>(TaskPrivatesMapTy));
4246   }
4247   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4248   // kmp_task_t *tt);
4249   llvm::Function *TaskEntry = emitProxyTaskFunction(
4250       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4251       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4252       TaskPrivatesMap);
4253 
4254   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4255   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4256   // kmp_routine_entry_t *task_entry);
4257   // Task flags. Format is taken from
4258   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4259   // description of kmp_tasking_flags struct.
4260   enum {
4261     TiedFlag = 0x1,
4262     FinalFlag = 0x2,
4263     DestructorsFlag = 0x8,
4264     PriorityFlag = 0x20,
4265     DetachableFlag = 0x40,
4266   };
4267   unsigned Flags = Data.Tied ? TiedFlag : 0;
4268   bool NeedsCleanup = false;
4269   if (!Privates.empty()) {
4270     NeedsCleanup =
4271         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4272     if (NeedsCleanup)
4273       Flags = Flags | DestructorsFlag;
4274   }
4275   if (Data.Priority.getInt())
4276     Flags = Flags | PriorityFlag;
4277   if (D.hasClausesOfKind<OMPDetachClause>())
4278     Flags = Flags | DetachableFlag;
4279   llvm::Value *TaskFlags =
4280       Data.Final.getPointer()
4281           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4282                                      CGF.Builder.getInt32(FinalFlag),
4283                                      CGF.Builder.getInt32(/*C=*/0))
4284           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4285   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4286   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4287   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4288       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4289       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4290           TaskEntry, KmpRoutineEntryPtrTy)};
4291   llvm::Value *NewTask;
4292   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4293     // Check if we have any device clause associated with the directive.
4294     const Expr *Device = nullptr;
4295     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4296       Device = C->getDevice();
4297     // Emit device ID if any otherwise use default value.
4298     llvm::Value *DeviceID;
4299     if (Device)
4300       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4301                                            CGF.Int64Ty, /*isSigned=*/true);
4302     else
4303       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4304     AllocArgs.push_back(DeviceID);
4305     NewTask = CGF.EmitRuntimeCall(
4306         OMPBuilder.getOrCreateRuntimeFunction(
4307             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4308         AllocArgs);
4309   } else {
4310     NewTask =
4311         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4312                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4313                             AllocArgs);
4314   }
4315   // Emit detach clause initialization.
4316   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4317   // task_descriptor);
4318   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4319     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4320     LValue EvtLVal = CGF.EmitLValue(Evt);
4321 
4322     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4323     // int gtid, kmp_task_t *task);
4324     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4325     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4326     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4327     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4328         OMPBuilder.getOrCreateRuntimeFunction(
4329             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4330         {Loc, Tid, NewTask});
4331     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4332                                       Evt->getExprLoc());
4333     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4334   }
4335   // Process affinity clauses.
4336   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4337     // Process list of affinity data.
4338     ASTContext &C = CGM.getContext();
4339     Address AffinitiesArray = Address::invalid();
4340     // Calculate number of elements to form the array of affinity data.
4341     llvm::Value *NumOfElements = nullptr;
4342     unsigned NumAffinities = 0;
4343     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4344       if (const Expr *Modifier = C->getModifier()) {
4345         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4346         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4347           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4348           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4349           NumOfElements =
4350               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4351         }
4352       } else {
4353         NumAffinities += C->varlist_size();
4354       }
4355     }
4356     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4357     // Fields ids in kmp_task_affinity_info record.
4358     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4359 
4360     QualType KmpTaskAffinityInfoArrayTy;
4361     if (NumOfElements) {
4362       NumOfElements = CGF.Builder.CreateNUWAdd(
4363           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4364       OpaqueValueExpr OVE(
4365           Loc,
4366           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4367           VK_RValue);
4368       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4369                                                     RValue::get(NumOfElements));
4370       KmpTaskAffinityInfoArrayTy =
4371           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4372                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4373       // Properly emit variable-sized array.
4374       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4375                                            ImplicitParamDecl::Other);
4376       CGF.EmitVarDecl(*PD);
4377       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4378       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4379                                                 /*isSigned=*/false);
4380     } else {
4381       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4382           KmpTaskAffinityInfoTy,
4383           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4384           ArrayType::Normal, /*IndexTypeQuals=*/0);
4385       AffinitiesArray =
4386           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4387       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4388       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4389                                              /*isSigned=*/false);
4390     }
4391 
4392     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4393     // Fill array by elements without iterators.
4394     unsigned Pos = 0;
4395     bool HasIterator = false;
4396     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4397       if (C->getModifier()) {
4398         HasIterator = true;
4399         continue;
4400       }
4401       for (const Expr *E : C->varlists()) {
4402         llvm::Value *Addr;
4403         llvm::Value *Size;
4404         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4405         LValue Base =
4406             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4407                                KmpTaskAffinityInfoTy);
4408         // affs[i].base_addr = &<Affinities[i].second>;
4409         LValue BaseAddrLVal = CGF.EmitLValueForField(
4410             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4411         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4412                               BaseAddrLVal);
4413         // affs[i].len = sizeof(<Affinities[i].second>);
4414         LValue LenLVal = CGF.EmitLValueForField(
4415             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4416         CGF.EmitStoreOfScalar(Size, LenLVal);
4417         ++Pos;
4418       }
4419     }
4420     LValue PosLVal;
4421     if (HasIterator) {
4422       PosLVal = CGF.MakeAddrLValue(
4423           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4424           C.getSizeType());
4425       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4426     }
4427     // Process elements with iterators.
4428     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4429       const Expr *Modifier = C->getModifier();
4430       if (!Modifier)
4431         continue;
4432       OMPIteratorGeneratorScope IteratorScope(
4433           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4434       for (const Expr *E : C->varlists()) {
4435         llvm::Value *Addr;
4436         llvm::Value *Size;
4437         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4438         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4439         LValue Base = CGF.MakeAddrLValue(
4440             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4441                     AffinitiesArray.getAlignment()),
4442             KmpTaskAffinityInfoTy);
4443         // affs[i].base_addr = &<Affinities[i].second>;
4444         LValue BaseAddrLVal = CGF.EmitLValueForField(
4445             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4446         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4447                               BaseAddrLVal);
4448         // affs[i].len = sizeof(<Affinities[i].second>);
4449         LValue LenLVal = CGF.EmitLValueForField(
4450             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4451         CGF.EmitStoreOfScalar(Size, LenLVal);
4452         Idx = CGF.Builder.CreateNUWAdd(
4453             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4454         CGF.EmitStoreOfScalar(Idx, PosLVal);
4455       }
4456     }
4457     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4458     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4459     // naffins, kmp_task_affinity_info_t *affin_list);
4460     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4461     llvm::Value *GTid = getThreadID(CGF, Loc);
4462     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4463         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4464     // FIXME: Emit the function and ignore its result for now unless the
4465     // runtime function is properly implemented.
4466     (void)CGF.EmitRuntimeCall(
4467         OMPBuilder.getOrCreateRuntimeFunction(
4468             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4469         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4470   }
4471   llvm::Value *NewTaskNewTaskTTy =
4472       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4473           NewTask, KmpTaskTWithPrivatesPtrTy);
4474   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4475                                                KmpTaskTWithPrivatesQTy);
4476   LValue TDBase =
4477       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4478   // Fill the data in the resulting kmp_task_t record.
4479   // Copy shareds if there are any.
4480   Address KmpTaskSharedsPtr = Address::invalid();
4481   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4482     KmpTaskSharedsPtr =
4483         Address(CGF.EmitLoadOfScalar(
4484                     CGF.EmitLValueForField(
4485                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4486                                            KmpTaskTShareds)),
4487                     Loc),
4488                 CGM.getNaturalTypeAlignment(SharedsTy));
4489     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4490     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4491     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4492   }
4493   // Emit initial values for private copies (if any).
4494   TaskResultTy Result;
4495   if (!Privates.empty()) {
4496     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4497                      SharedsTy, SharedsPtrTy, Data, Privates,
4498                      /*ForDup=*/false);
4499     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4500         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4501       Result.TaskDupFn = emitTaskDupFunction(
4502           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4503           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4504           /*WithLastIter=*/!Data.LastprivateVars.empty());
4505     }
4506   }
4507   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4508   enum { Priority = 0, Destructors = 1 };
4509   // Provide pointer to function with destructors for privates.
4510   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4511   const RecordDecl *KmpCmplrdataUD =
4512       (*FI)->getType()->getAsUnionType()->getDecl();
4513   if (NeedsCleanup) {
4514     llvm::Value *DestructorFn = emitDestructorsFunction(
4515         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4516         KmpTaskTWithPrivatesQTy);
4517     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4518     LValue DestructorsLV = CGF.EmitLValueForField(
4519         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4520     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4521                               DestructorFn, KmpRoutineEntryPtrTy),
4522                           DestructorsLV);
4523   }
4524   // Set priority.
4525   if (Data.Priority.getInt()) {
4526     LValue Data2LV = CGF.EmitLValueForField(
4527         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4528     LValue PriorityLV = CGF.EmitLValueForField(
4529         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4530     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4531   }
4532   Result.NewTask = NewTask;
4533   Result.TaskEntry = TaskEntry;
4534   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4535   Result.TDBase = TDBase;
4536   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4537   return Result;
4538 }
4539 
4540 namespace {
4541 /// Dependence kind for RTL.
4542 enum RTLDependenceKindTy {
4543   DepIn = 0x01,
4544   DepInOut = 0x3,
4545   DepMutexInOutSet = 0x4
4546 };
4547 /// Fields ids in kmp_depend_info record.
4548 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4549 } // namespace
4550 
4551 /// Translates internal dependency kind into the runtime kind.
4552 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4553   RTLDependenceKindTy DepKind;
4554   switch (K) {
4555   case OMPC_DEPEND_in:
4556     DepKind = DepIn;
4557     break;
4558   // Out and InOut dependencies must use the same code.
4559   case OMPC_DEPEND_out:
4560   case OMPC_DEPEND_inout:
4561     DepKind = DepInOut;
4562     break;
4563   case OMPC_DEPEND_mutexinoutset:
4564     DepKind = DepMutexInOutSet;
4565     break;
4566   case OMPC_DEPEND_source:
4567   case OMPC_DEPEND_sink:
4568   case OMPC_DEPEND_depobj:
4569   case OMPC_DEPEND_unknown:
4570     llvm_unreachable("Unknown task dependence type");
4571   }
4572   return DepKind;
4573 }
4574 
4575 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4576 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4577                            QualType &FlagsTy) {
4578   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4579   if (KmpDependInfoTy.isNull()) {
4580     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4581     KmpDependInfoRD->startDefinition();
4582     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4583     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4584     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4585     KmpDependInfoRD->completeDefinition();
4586     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4587   }
4588 }
4589 
4590 std::pair<llvm::Value *, LValue>
4591 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4592                                    SourceLocation Loc) {
4593   ASTContext &C = CGM.getContext();
4594   QualType FlagsTy;
4595   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4596   RecordDecl *KmpDependInfoRD =
4597       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4598   LValue Base = CGF.EmitLoadOfPointerLValue(
4599       DepobjLVal.getAddress(CGF),
4600       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4601   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4602   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4603           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4604   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4605                             Base.getTBAAInfo());
4606   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4607       Addr.getPointer(),
4608       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4609   LValue NumDepsBase = CGF.MakeAddrLValue(
4610       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4611       Base.getBaseInfo(), Base.getTBAAInfo());
4612   // NumDeps = deps[i].base_addr;
4613   LValue BaseAddrLVal = CGF.EmitLValueForField(
4614       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4615   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4616   return std::make_pair(NumDeps, Base);
4617 }
4618 
4619 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4620                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4621                            const OMPTaskDataTy::DependData &Data,
4622                            Address DependenciesArray) {
4623   CodeGenModule &CGM = CGF.CGM;
4624   ASTContext &C = CGM.getContext();
4625   QualType FlagsTy;
4626   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4627   RecordDecl *KmpDependInfoRD =
4628       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4629   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4630 
4631   OMPIteratorGeneratorScope IteratorScope(
4632       CGF, cast_or_null<OMPIteratorExpr>(
4633                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4634                                  : nullptr));
4635   for (const Expr *E : Data.DepExprs) {
4636     llvm::Value *Addr;
4637     llvm::Value *Size;
4638     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4639     LValue Base;
4640     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4641       Base = CGF.MakeAddrLValue(
4642           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4643     } else {
4644       LValue &PosLVal = *Pos.get<LValue *>();
4645       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4646       Base = CGF.MakeAddrLValue(
4647           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4648                   DependenciesArray.getAlignment()),
4649           KmpDependInfoTy);
4650     }
4651     // deps[i].base_addr = &<Dependencies[i].second>;
4652     LValue BaseAddrLVal = CGF.EmitLValueForField(
4653         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4654     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4655                           BaseAddrLVal);
4656     // deps[i].len = sizeof(<Dependencies[i].second>);
4657     LValue LenLVal = CGF.EmitLValueForField(
4658         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4659     CGF.EmitStoreOfScalar(Size, LenLVal);
4660     // deps[i].flags = <Dependencies[i].first>;
4661     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4662     LValue FlagsLVal = CGF.EmitLValueForField(
4663         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4664     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4665                           FlagsLVal);
4666     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4667       ++(*P);
4668     } else {
4669       LValue &PosLVal = *Pos.get<LValue *>();
4670       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4671       Idx = CGF.Builder.CreateNUWAdd(Idx,
4672                                      llvm::ConstantInt::get(Idx->getType(), 1));
4673       CGF.EmitStoreOfScalar(Idx, PosLVal);
4674     }
4675   }
4676 }
4677 
4678 static SmallVector<llvm::Value *, 4>
4679 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4680                         const OMPTaskDataTy::DependData &Data) {
4681   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4682          "Expected depobj dependecy kind.");
4683   SmallVector<llvm::Value *, 4> Sizes;
4684   SmallVector<LValue, 4> SizeLVals;
4685   ASTContext &C = CGF.getContext();
4686   QualType FlagsTy;
4687   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4688   RecordDecl *KmpDependInfoRD =
4689       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4690   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4691   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4692   {
4693     OMPIteratorGeneratorScope IteratorScope(
4694         CGF, cast_or_null<OMPIteratorExpr>(
4695                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                    : nullptr));
4697     for (const Expr *E : Data.DepExprs) {
4698       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4699       LValue Base = CGF.EmitLoadOfPointerLValue(
4700           DepobjLVal.getAddress(CGF),
4701           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4702       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4703           Base.getAddress(CGF), KmpDependInfoPtrT);
4704       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4705                                 Base.getTBAAInfo());
4706       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4707           Addr.getPointer(),
4708           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4709       LValue NumDepsBase = CGF.MakeAddrLValue(
4710           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4711           Base.getBaseInfo(), Base.getTBAAInfo());
4712       // NumDeps = deps[i].base_addr;
4713       LValue BaseAddrLVal = CGF.EmitLValueForField(
4714           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4715       llvm::Value *NumDeps =
4716           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4717       LValue NumLVal = CGF.MakeAddrLValue(
4718           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4719           C.getUIntPtrType());
4720       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4721                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4722       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4723       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4724       CGF.EmitStoreOfScalar(Add, NumLVal);
4725       SizeLVals.push_back(NumLVal);
4726     }
4727   }
4728   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4729     llvm::Value *Size =
4730         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4731     Sizes.push_back(Size);
4732   }
4733   return Sizes;
4734 }
4735 
4736 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4737                                LValue PosLVal,
4738                                const OMPTaskDataTy::DependData &Data,
4739                                Address DependenciesArray) {
4740   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4741          "Expected depobj dependecy kind.");
4742   ASTContext &C = CGF.getContext();
4743   QualType FlagsTy;
4744   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4745   RecordDecl *KmpDependInfoRD =
4746       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4747   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4748   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4749   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4750   {
4751     OMPIteratorGeneratorScope IteratorScope(
4752         CGF, cast_or_null<OMPIteratorExpr>(
4753                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4754                                    : nullptr));
4755     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4756       const Expr *E = Data.DepExprs[I];
4757       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4758       LValue Base = CGF.EmitLoadOfPointerLValue(
4759           DepobjLVal.getAddress(CGF),
4760           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4761       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4762           Base.getAddress(CGF), KmpDependInfoPtrT);
4763       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4764                                 Base.getTBAAInfo());
4765 
4766       // Get number of elements in a single depobj.
4767       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4768           Addr.getPointer(),
4769           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4770       LValue NumDepsBase = CGF.MakeAddrLValue(
4771           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4772           Base.getBaseInfo(), Base.getTBAAInfo());
4773       // NumDeps = deps[i].base_addr;
4774       LValue BaseAddrLVal = CGF.EmitLValueForField(
4775           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4776       llvm::Value *NumDeps =
4777           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4778 
4779       // memcopy dependency data.
4780       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4781           ElSize,
4782           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4783       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4784       Address DepAddr =
4785           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4786                   DependenciesArray.getAlignment());
4787       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4788 
4789       // Increase pos.
4790       // pos += size;
4791       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4792       CGF.EmitStoreOfScalar(Add, PosLVal);
4793     }
4794   }
4795 }
4796 
4797 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4798     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4799     SourceLocation Loc) {
4800   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4801         return D.DepExprs.empty();
4802       }))
4803     return std::make_pair(nullptr, Address::invalid());
4804   // Process list of dependencies.
4805   ASTContext &C = CGM.getContext();
4806   Address DependenciesArray = Address::invalid();
4807   llvm::Value *NumOfElements = nullptr;
4808   unsigned NumDependencies = std::accumulate(
4809       Dependencies.begin(), Dependencies.end(), 0,
4810       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4811         return D.DepKind == OMPC_DEPEND_depobj
4812                    ? V
4813                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4814       });
4815   QualType FlagsTy;
4816   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4817   bool HasDepobjDeps = false;
4818   bool HasRegularWithIterators = false;
4819   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4820   llvm::Value *NumOfRegularWithIterators =
4821       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4822   // Calculate number of depobj dependecies and regular deps with the iterators.
4823   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4824     if (D.DepKind == OMPC_DEPEND_depobj) {
4825       SmallVector<llvm::Value *, 4> Sizes =
4826           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4827       for (llvm::Value *Size : Sizes) {
4828         NumOfDepobjElements =
4829             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4830       }
4831       HasDepobjDeps = true;
4832       continue;
4833     }
4834     // Include number of iterations, if any.
4835     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4836       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4837         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4838         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4839         NumOfRegularWithIterators =
4840             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4841       }
4842       HasRegularWithIterators = true;
4843       continue;
4844     }
4845   }
4846 
4847   QualType KmpDependInfoArrayTy;
4848   if (HasDepobjDeps || HasRegularWithIterators) {
4849     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4850                                            /*isSigned=*/false);
4851     if (HasDepobjDeps) {
4852       NumOfElements =
4853           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4854     }
4855     if (HasRegularWithIterators) {
4856       NumOfElements =
4857           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4858     }
4859     OpaqueValueExpr OVE(Loc,
4860                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4861                         VK_RValue);
4862     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4863                                                   RValue::get(NumOfElements));
4864     KmpDependInfoArrayTy =
4865         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4866                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4867     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4868     // Properly emit variable-sized array.
4869     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4870                                          ImplicitParamDecl::Other);
4871     CGF.EmitVarDecl(*PD);
4872     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4873     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4874                                               /*isSigned=*/false);
4875   } else {
4876     KmpDependInfoArrayTy = C.getConstantArrayType(
4877         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4878         ArrayType::Normal, /*IndexTypeQuals=*/0);
4879     DependenciesArray =
4880         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4881     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4882     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4883                                            /*isSigned=*/false);
4884   }
4885   unsigned Pos = 0;
4886   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4887     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4888         Dependencies[I].IteratorExpr)
4889       continue;
4890     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4891                    DependenciesArray);
4892   }
4893   // Copy regular dependecies with iterators.
4894   LValue PosLVal = CGF.MakeAddrLValue(
4895       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4896   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4897   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4898     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4899         !Dependencies[I].IteratorExpr)
4900       continue;
4901     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4902                    DependenciesArray);
4903   }
4904   // Copy final depobj arrays without iterators.
4905   if (HasDepobjDeps) {
4906     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4907       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4908         continue;
4909       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4910                          DependenciesArray);
4911     }
4912   }
4913   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4914       DependenciesArray, CGF.VoidPtrTy);
4915   return std::make_pair(NumOfElements, DependenciesArray);
4916 }
4917 
4918 Address CGOpenMPRuntime::emitDepobjDependClause(
4919     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4920     SourceLocation Loc) {
4921   if (Dependencies.DepExprs.empty())
4922     return Address::invalid();
4923   // Process list of dependencies.
4924   ASTContext &C = CGM.getContext();
4925   Address DependenciesArray = Address::invalid();
4926   unsigned NumDependencies = Dependencies.DepExprs.size();
4927   QualType FlagsTy;
4928   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4929   RecordDecl *KmpDependInfoRD =
4930       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4931 
4932   llvm::Value *Size;
4933   // Define type kmp_depend_info[<Dependencies.size()>];
4934   // For depobj reserve one extra element to store the number of elements.
4935   // It is required to handle depobj(x) update(in) construct.
4936   // kmp_depend_info[<Dependencies.size()>] deps;
4937   llvm::Value *NumDepsVal;
4938   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4939   if (const auto *IE =
4940           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4941     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4942     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4943       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4944       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4945       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4946     }
4947     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4948                                     NumDepsVal);
4949     CharUnits SizeInBytes =
4950         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4951     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4952     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4953     NumDepsVal =
4954         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4955   } else {
4956     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4957         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4958         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4959     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4960     Size = CGM.getSize(Sz.alignTo(Align));
4961     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4962   }
4963   // Need to allocate on the dynamic memory.
4964   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4965   // Use default allocator.
4966   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4967   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4968 
4969   llvm::Value *Addr =
4970       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4971                               CGM.getModule(), OMPRTL___kmpc_alloc),
4972                           Args, ".dep.arr.addr");
4973   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4974       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4975   DependenciesArray = Address(Addr, Align);
4976   // Write number of elements in the first element of array for depobj.
4977   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4978   // deps[i].base_addr = NumDependencies;
4979   LValue BaseAddrLVal = CGF.EmitLValueForField(
4980       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4981   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4982   llvm::PointerUnion<unsigned *, LValue *> Pos;
4983   unsigned Idx = 1;
4984   LValue PosLVal;
4985   if (Dependencies.IteratorExpr) {
4986     PosLVal = CGF.MakeAddrLValue(
4987         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4988         C.getSizeType());
4989     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4990                           /*IsInit=*/true);
4991     Pos = &PosLVal;
4992   } else {
4993     Pos = &Idx;
4994   }
4995   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4996   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4997       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4998   return DependenciesArray;
4999 }
5000 
5001 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5002                                         SourceLocation Loc) {
5003   ASTContext &C = CGM.getContext();
5004   QualType FlagsTy;
5005   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5006   LValue Base = CGF.EmitLoadOfPointerLValue(
5007       DepobjLVal.getAddress(CGF),
5008       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5009   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5010   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5011       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5012   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5013       Addr.getPointer(),
5014       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5015   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5016                                                                CGF.VoidPtrTy);
5017   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5018   // Use default allocator.
5019   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5020   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5021 
5022   // _kmpc_free(gtid, addr, nullptr);
5023   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5024                                 CGM.getModule(), OMPRTL___kmpc_free),
5025                             Args);
5026 }
5027 
5028 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5029                                        OpenMPDependClauseKind NewDepKind,
5030                                        SourceLocation Loc) {
5031   ASTContext &C = CGM.getContext();
5032   QualType FlagsTy;
5033   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5034   RecordDecl *KmpDependInfoRD =
5035       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5036   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5037   llvm::Value *NumDeps;
5038   LValue Base;
5039   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5040 
5041   Address Begin = Base.getAddress(CGF);
5042   // Cast from pointer to array type to pointer to single element.
5043   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5044   // The basic structure here is a while-do loop.
5045   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5046   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5047   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5048   CGF.EmitBlock(BodyBB);
5049   llvm::PHINode *ElementPHI =
5050       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5051   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5052   Begin = Address(ElementPHI, Begin.getAlignment());
5053   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5054                             Base.getTBAAInfo());
5055   // deps[i].flags = NewDepKind;
5056   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5057   LValue FlagsLVal = CGF.EmitLValueForField(
5058       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5059   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5060                         FlagsLVal);
5061 
5062   // Shift the address forward by one element.
5063   Address ElementNext =
5064       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5065   ElementPHI->addIncoming(ElementNext.getPointer(),
5066                           CGF.Builder.GetInsertBlock());
5067   llvm::Value *IsEmpty =
5068       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5069   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5070   // Done.
5071   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5072 }
5073 
5074 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5075                                    const OMPExecutableDirective &D,
5076                                    llvm::Function *TaskFunction,
5077                                    QualType SharedsTy, Address Shareds,
5078                                    const Expr *IfCond,
5079                                    const OMPTaskDataTy &Data) {
5080   if (!CGF.HaveInsertPoint())
5081     return;
5082 
5083   TaskResultTy Result =
5084       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5085   llvm::Value *NewTask = Result.NewTask;
5086   llvm::Function *TaskEntry = Result.TaskEntry;
5087   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5088   LValue TDBase = Result.TDBase;
5089   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5090   // Process list of dependences.
5091   Address DependenciesArray = Address::invalid();
5092   llvm::Value *NumOfElements;
5093   std::tie(NumOfElements, DependenciesArray) =
5094       emitDependClause(CGF, Data.Dependences, Loc);
5095 
5096   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5097   // libcall.
5098   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5099   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5100   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5101   // list is not empty
5102   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5103   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5104   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5105   llvm::Value *DepTaskArgs[7];
5106   if (!Data.Dependences.empty()) {
5107     DepTaskArgs[0] = UpLoc;
5108     DepTaskArgs[1] = ThreadID;
5109     DepTaskArgs[2] = NewTask;
5110     DepTaskArgs[3] = NumOfElements;
5111     DepTaskArgs[4] = DependenciesArray.getPointer();
5112     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5113     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5114   }
5115   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5116                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5117     if (!Data.Tied) {
5118       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5119       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5120       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5121     }
5122     if (!Data.Dependences.empty()) {
5123       CGF.EmitRuntimeCall(
5124           OMPBuilder.getOrCreateRuntimeFunction(
5125               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5126           DepTaskArgs);
5127     } else {
5128       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5129                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5130                           TaskArgs);
5131     }
5132     // Check if parent region is untied and build return for untied task;
5133     if (auto *Region =
5134             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5135       Region->emitUntiedSwitch(CGF);
5136   };
5137 
5138   llvm::Value *DepWaitTaskArgs[6];
5139   if (!Data.Dependences.empty()) {
5140     DepWaitTaskArgs[0] = UpLoc;
5141     DepWaitTaskArgs[1] = ThreadID;
5142     DepWaitTaskArgs[2] = NumOfElements;
5143     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5144     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5145     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5146   }
5147   auto &M = CGM.getModule();
5148   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5149                         TaskEntry, &Data, &DepWaitTaskArgs,
5150                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5151     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5152     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5153     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5154     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5155     // is specified.
5156     if (!Data.Dependences.empty())
5157       CGF.EmitRuntimeCall(
5158           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5159           DepWaitTaskArgs);
5160     // Call proxy_task_entry(gtid, new_task);
5161     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5162                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5163       Action.Enter(CGF);
5164       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5165       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5166                                                           OutlinedFnArgs);
5167     };
5168 
5169     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5170     // kmp_task_t *new_task);
5171     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5172     // kmp_task_t *new_task);
5173     RegionCodeGenTy RCG(CodeGen);
5174     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5175                               M, OMPRTL___kmpc_omp_task_begin_if0),
5176                           TaskArgs,
5177                           OMPBuilder.getOrCreateRuntimeFunction(
5178                               M, OMPRTL___kmpc_omp_task_complete_if0),
5179                           TaskArgs);
5180     RCG.setAction(Action);
5181     RCG(CGF);
5182   };
5183 
5184   if (IfCond) {
5185     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5186   } else {
5187     RegionCodeGenTy ThenRCG(ThenCodeGen);
5188     ThenRCG(CGF);
5189   }
5190 }
5191 
5192 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5193                                        const OMPLoopDirective &D,
5194                                        llvm::Function *TaskFunction,
5195                                        QualType SharedsTy, Address Shareds,
5196                                        const Expr *IfCond,
5197                                        const OMPTaskDataTy &Data) {
5198   if (!CGF.HaveInsertPoint())
5199     return;
5200   TaskResultTy Result =
5201       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5202   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5203   // libcall.
5204   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5205   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5206   // sched, kmp_uint64 grainsize, void *task_dup);
5207   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5208   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5209   llvm::Value *IfVal;
5210   if (IfCond) {
5211     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5212                                       /*isSigned=*/true);
5213   } else {
5214     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5215   }
5216 
5217   LValue LBLVal = CGF.EmitLValueForField(
5218       Result.TDBase,
5219       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5220   const auto *LBVar =
5221       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5222   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5223                        LBLVal.getQuals(),
5224                        /*IsInitializer=*/true);
5225   LValue UBLVal = CGF.EmitLValueForField(
5226       Result.TDBase,
5227       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5228   const auto *UBVar =
5229       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5230   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5231                        UBLVal.getQuals(),
5232                        /*IsInitializer=*/true);
5233   LValue StLVal = CGF.EmitLValueForField(
5234       Result.TDBase,
5235       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5236   const auto *StVar =
5237       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5238   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5239                        StLVal.getQuals(),
5240                        /*IsInitializer=*/true);
5241   // Store reductions address.
5242   LValue RedLVal = CGF.EmitLValueForField(
5243       Result.TDBase,
5244       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5245   if (Data.Reductions) {
5246     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5247   } else {
5248     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5249                                CGF.getContext().VoidPtrTy);
5250   }
5251   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5252   llvm::Value *TaskArgs[] = {
5253       UpLoc,
5254       ThreadID,
5255       Result.NewTask,
5256       IfVal,
5257       LBLVal.getPointer(CGF),
5258       UBLVal.getPointer(CGF),
5259       CGF.EmitLoadOfScalar(StLVal, Loc),
5260       llvm::ConstantInt::getSigned(
5261           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5262       llvm::ConstantInt::getSigned(
5263           CGF.IntTy, Data.Schedule.getPointer()
5264                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5265                          : NoSchedule),
5266       Data.Schedule.getPointer()
5267           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5268                                       /*isSigned=*/false)
5269           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5270       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5271                              Result.TaskDupFn, CGF.VoidPtrTy)
5272                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5273   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5274                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5275                       TaskArgs);
5276 }
5277 
5278 /// Emit reduction operation for each element of array (required for
5279 /// array sections) LHS op = RHS.
5280 /// \param Type Type of array.
5281 /// \param LHSVar Variable on the left side of the reduction operation
5282 /// (references element of array in original variable).
5283 /// \param RHSVar Variable on the right side of the reduction operation
5284 /// (references element of array in original variable).
5285 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5286 /// RHSVar.
5287 static void EmitOMPAggregateReduction(
5288     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5289     const VarDecl *RHSVar,
5290     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5291                                   const Expr *, const Expr *)> &RedOpGen,
5292     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5293     const Expr *UpExpr = nullptr) {
5294   // Perform element-by-element initialization.
5295   QualType ElementTy;
5296   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5297   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5298 
5299   // Drill down to the base element type on both arrays.
5300   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5301   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5302 
5303   llvm::Value *RHSBegin = RHSAddr.getPointer();
5304   llvm::Value *LHSBegin = LHSAddr.getPointer();
5305   // Cast from pointer to array type to pointer to single element.
5306   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5307   // The basic structure here is a while-do loop.
5308   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5309   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5310   llvm::Value *IsEmpty =
5311       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5312   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5313 
5314   // Enter the loop body, making that address the current address.
5315   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5316   CGF.EmitBlock(BodyBB);
5317 
5318   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5319 
5320   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5321       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5322   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5323   Address RHSElementCurrent =
5324       Address(RHSElementPHI,
5325               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5326 
5327   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5328       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5329   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5330   Address LHSElementCurrent =
5331       Address(LHSElementPHI,
5332               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5333 
5334   // Emit copy.
5335   CodeGenFunction::OMPPrivateScope Scope(CGF);
5336   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5337   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5338   Scope.Privatize();
5339   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5340   Scope.ForceCleanup();
5341 
5342   // Shift the address forward by one element.
5343   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5344       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5345   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5346       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5347   // Check whether we've reached the end.
5348   llvm::Value *Done =
5349       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5350   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5351   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5352   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5353 
5354   // Done.
5355   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5356 }
5357 
5358 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5359 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5360 /// UDR combiner function.
5361 static void emitReductionCombiner(CodeGenFunction &CGF,
5362                                   const Expr *ReductionOp) {
5363   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5364     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5365       if (const auto *DRE =
5366               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5367         if (const auto *DRD =
5368                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5369           std::pair<llvm::Function *, llvm::Function *> Reduction =
5370               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5371           RValue Func = RValue::get(Reduction.first);
5372           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5373           CGF.EmitIgnoredExpr(ReductionOp);
5374           return;
5375         }
5376   CGF.EmitIgnoredExpr(ReductionOp);
5377 }
5378 
5379 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5380     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5381     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5382     ArrayRef<const Expr *> ReductionOps) {
5383   ASTContext &C = CGM.getContext();
5384 
5385   // void reduction_func(void *LHSArg, void *RHSArg);
5386   FunctionArgList Args;
5387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388                            ImplicitParamDecl::Other);
5389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5390                            ImplicitParamDecl::Other);
5391   Args.push_back(&LHSArg);
5392   Args.push_back(&RHSArg);
5393   const auto &CGFI =
5394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5395   std::string Name = getName({"omp", "reduction", "reduction_func"});
5396   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5397                                     llvm::GlobalValue::InternalLinkage, Name,
5398                                     &CGM.getModule());
5399   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5400   Fn->setDoesNotRecurse();
5401   CodeGenFunction CGF(CGM);
5402   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5403 
5404   // Dst = (void*[n])(LHSArg);
5405   // Src = (void*[n])(RHSArg);
5406   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5408       ArgsType), CGF.getPointerAlign());
5409   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5411       ArgsType), CGF.getPointerAlign());
5412 
5413   //  ...
5414   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5415   //  ...
5416   CodeGenFunction::OMPPrivateScope Scope(CGF);
5417   auto IPriv = Privates.begin();
5418   unsigned Idx = 0;
5419   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5420     const auto *RHSVar =
5421         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5422     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5423       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5424     });
5425     const auto *LHSVar =
5426         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5427     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5428       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5429     });
5430     QualType PrivTy = (*IPriv)->getType();
5431     if (PrivTy->isVariablyModifiedType()) {
5432       // Get array size and emit VLA type.
5433       ++Idx;
5434       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5435       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5436       const VariableArrayType *VLA =
5437           CGF.getContext().getAsVariableArrayType(PrivTy);
5438       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5439       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5440           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5441       CGF.EmitVariablyModifiedType(PrivTy);
5442     }
5443   }
5444   Scope.Privatize();
5445   IPriv = Privates.begin();
5446   auto ILHS = LHSExprs.begin();
5447   auto IRHS = RHSExprs.begin();
5448   for (const Expr *E : ReductionOps) {
5449     if ((*IPriv)->getType()->isArrayType()) {
5450       // Emit reduction for array section.
5451       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5452       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5453       EmitOMPAggregateReduction(
5454           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5455           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5456             emitReductionCombiner(CGF, E);
5457           });
5458     } else {
5459       // Emit reduction for array subscript or single variable.
5460       emitReductionCombiner(CGF, E);
5461     }
5462     ++IPriv;
5463     ++ILHS;
5464     ++IRHS;
5465   }
5466   Scope.ForceCleanup();
5467   CGF.FinishFunction();
5468   return Fn;
5469 }
5470 
5471 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5472                                                   const Expr *ReductionOp,
5473                                                   const Expr *PrivateRef,
5474                                                   const DeclRefExpr *LHS,
5475                                                   const DeclRefExpr *RHS) {
5476   if (PrivateRef->getType()->isArrayType()) {
5477     // Emit reduction for array section.
5478     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5479     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5480     EmitOMPAggregateReduction(
5481         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5482         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5483           emitReductionCombiner(CGF, ReductionOp);
5484         });
5485   } else {
5486     // Emit reduction for array subscript or single variable.
5487     emitReductionCombiner(CGF, ReductionOp);
5488   }
5489 }
5490 
5491 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5492                                     ArrayRef<const Expr *> Privates,
5493                                     ArrayRef<const Expr *> LHSExprs,
5494                                     ArrayRef<const Expr *> RHSExprs,
5495                                     ArrayRef<const Expr *> ReductionOps,
5496                                     ReductionOptionsTy Options) {
5497   if (!CGF.HaveInsertPoint())
5498     return;
5499 
5500   bool WithNowait = Options.WithNowait;
5501   bool SimpleReduction = Options.SimpleReduction;
5502 
5503   // Next code should be emitted for reduction:
5504   //
5505   // static kmp_critical_name lock = { 0 };
5506   //
5507   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5508   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5509   //  ...
5510   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5511   //  *(Type<n>-1*)rhs[<n>-1]);
5512   // }
5513   //
5514   // ...
5515   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5516   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5517   // RedList, reduce_func, &<lock>)) {
5518   // case 1:
5519   //  ...
5520   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5521   //  ...
5522   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5523   // break;
5524   // case 2:
5525   //  ...
5526   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5527   //  ...
5528   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5529   // break;
5530   // default:;
5531   // }
5532   //
5533   // if SimpleReduction is true, only the next code is generated:
5534   //  ...
5535   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5536   //  ...
5537 
5538   ASTContext &C = CGM.getContext();
5539 
5540   if (SimpleReduction) {
5541     CodeGenFunction::RunCleanupsScope Scope(CGF);
5542     auto IPriv = Privates.begin();
5543     auto ILHS = LHSExprs.begin();
5544     auto IRHS = RHSExprs.begin();
5545     for (const Expr *E : ReductionOps) {
5546       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5547                                   cast<DeclRefExpr>(*IRHS));
5548       ++IPriv;
5549       ++ILHS;
5550       ++IRHS;
5551     }
5552     return;
5553   }
5554 
5555   // 1. Build a list of reduction variables.
5556   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5557   auto Size = RHSExprs.size();
5558   for (const Expr *E : Privates) {
5559     if (E->getType()->isVariablyModifiedType())
5560       // Reserve place for array size.
5561       ++Size;
5562   }
5563   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5564   QualType ReductionArrayTy =
5565       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5566                              /*IndexTypeQuals=*/0);
5567   Address ReductionList =
5568       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5569   auto IPriv = Privates.begin();
5570   unsigned Idx = 0;
5571   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5572     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5573     CGF.Builder.CreateStore(
5574         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5575             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5576         Elem);
5577     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5578       // Store array size.
5579       ++Idx;
5580       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5581       llvm::Value *Size = CGF.Builder.CreateIntCast(
5582           CGF.getVLASize(
5583                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5584               .NumElts,
5585           CGF.SizeTy, /*isSigned=*/false);
5586       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5587                               Elem);
5588     }
5589   }
5590 
5591   // 2. Emit reduce_func().
5592   llvm::Function *ReductionFn = emitReductionFunction(
5593       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5594       LHSExprs, RHSExprs, ReductionOps);
5595 
5596   // 3. Create static kmp_critical_name lock = { 0 };
5597   std::string Name = getName({"reduction"});
5598   llvm::Value *Lock = getCriticalRegionLock(Name);
5599 
5600   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5601   // RedList, reduce_func, &<lock>);
5602   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5603   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5604   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5605   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5606       ReductionList.getPointer(), CGF.VoidPtrTy);
5607   llvm::Value *Args[] = {
5608       IdentTLoc,                             // ident_t *<loc>
5609       ThreadId,                              // i32 <gtid>
5610       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5611       ReductionArrayTySize,                  // size_type sizeof(RedList)
5612       RL,                                    // void *RedList
5613       ReductionFn, // void (*) (void *, void *) <reduce_func>
5614       Lock         // kmp_critical_name *&<lock>
5615   };
5616   llvm::Value *Res = CGF.EmitRuntimeCall(
5617       OMPBuilder.getOrCreateRuntimeFunction(
5618           CGM.getModule(),
5619           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5620       Args);
5621 
5622   // 5. Build switch(res)
5623   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5624   llvm::SwitchInst *SwInst =
5625       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5626 
5627   // 6. Build case 1:
5628   //  ...
5629   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5630   //  ...
5631   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5632   // break;
5633   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5634   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5635   CGF.EmitBlock(Case1BB);
5636 
5637   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5638   llvm::Value *EndArgs[] = {
5639       IdentTLoc, // ident_t *<loc>
5640       ThreadId,  // i32 <gtid>
5641       Lock       // kmp_critical_name *&<lock>
5642   };
5643   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5644                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5645     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5646     auto IPriv = Privates.begin();
5647     auto ILHS = LHSExprs.begin();
5648     auto IRHS = RHSExprs.begin();
5649     for (const Expr *E : ReductionOps) {
5650       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5651                                      cast<DeclRefExpr>(*IRHS));
5652       ++IPriv;
5653       ++ILHS;
5654       ++IRHS;
5655     }
5656   };
5657   RegionCodeGenTy RCG(CodeGen);
5658   CommonActionTy Action(
5659       nullptr, llvm::None,
5660       OMPBuilder.getOrCreateRuntimeFunction(
5661           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5662                                       : OMPRTL___kmpc_end_reduce),
5663       EndArgs);
5664   RCG.setAction(Action);
5665   RCG(CGF);
5666 
5667   CGF.EmitBranch(DefaultBB);
5668 
5669   // 7. Build case 2:
5670   //  ...
5671   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5672   //  ...
5673   // break;
5674   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5675   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5676   CGF.EmitBlock(Case2BB);
5677 
5678   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5679                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5680     auto ILHS = LHSExprs.begin();
5681     auto IRHS = RHSExprs.begin();
5682     auto IPriv = Privates.begin();
5683     for (const Expr *E : ReductionOps) {
5684       const Expr *XExpr = nullptr;
5685       const Expr *EExpr = nullptr;
5686       const Expr *UpExpr = nullptr;
5687       BinaryOperatorKind BO = BO_Comma;
5688       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5689         if (BO->getOpcode() == BO_Assign) {
5690           XExpr = BO->getLHS();
5691           UpExpr = BO->getRHS();
5692         }
5693       }
5694       // Try to emit update expression as a simple atomic.
5695       const Expr *RHSExpr = UpExpr;
5696       if (RHSExpr) {
5697         // Analyze RHS part of the whole expression.
5698         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5699                 RHSExpr->IgnoreParenImpCasts())) {
5700           // If this is a conditional operator, analyze its condition for
5701           // min/max reduction operator.
5702           RHSExpr = ACO->getCond();
5703         }
5704         if (const auto *BORHS =
5705                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5706           EExpr = BORHS->getRHS();
5707           BO = BORHS->getOpcode();
5708         }
5709       }
5710       if (XExpr) {
5711         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5712         auto &&AtomicRedGen = [BO, VD,
5713                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5714                                     const Expr *EExpr, const Expr *UpExpr) {
5715           LValue X = CGF.EmitLValue(XExpr);
5716           RValue E;
5717           if (EExpr)
5718             E = CGF.EmitAnyExpr(EExpr);
5719           CGF.EmitOMPAtomicSimpleUpdateExpr(
5720               X, E, BO, /*IsXLHSInRHSPart=*/true,
5721               llvm::AtomicOrdering::Monotonic, Loc,
5722               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5723                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5724                 PrivateScope.addPrivate(
5725                     VD, [&CGF, VD, XRValue, Loc]() {
5726                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5727                       CGF.emitOMPSimpleStore(
5728                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5729                           VD->getType().getNonReferenceType(), Loc);
5730                       return LHSTemp;
5731                     });
5732                 (void)PrivateScope.Privatize();
5733                 return CGF.EmitAnyExpr(UpExpr);
5734               });
5735         };
5736         if ((*IPriv)->getType()->isArrayType()) {
5737           // Emit atomic reduction for array section.
5738           const auto *RHSVar =
5739               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5740           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5741                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5742         } else {
5743           // Emit atomic reduction for array subscript or single variable.
5744           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5745         }
5746       } else {
5747         // Emit as a critical region.
5748         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5749                                            const Expr *, const Expr *) {
5750           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5751           std::string Name = RT.getName({"atomic_reduction"});
5752           RT.emitCriticalRegion(
5753               CGF, Name,
5754               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5755                 Action.Enter(CGF);
5756                 emitReductionCombiner(CGF, E);
5757               },
5758               Loc);
5759         };
5760         if ((*IPriv)->getType()->isArrayType()) {
5761           const auto *LHSVar =
5762               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5763           const auto *RHSVar =
5764               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5765           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5766                                     CritRedGen);
5767         } else {
5768           CritRedGen(CGF, nullptr, nullptr, nullptr);
5769         }
5770       }
5771       ++ILHS;
5772       ++IRHS;
5773       ++IPriv;
5774     }
5775   };
5776   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5777   if (!WithNowait) {
5778     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5779     llvm::Value *EndArgs[] = {
5780         IdentTLoc, // ident_t *<loc>
5781         ThreadId,  // i32 <gtid>
5782         Lock       // kmp_critical_name *&<lock>
5783     };
5784     CommonActionTy Action(nullptr, llvm::None,
5785                           OMPBuilder.getOrCreateRuntimeFunction(
5786                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5787                           EndArgs);
5788     AtomicRCG.setAction(Action);
5789     AtomicRCG(CGF);
5790   } else {
5791     AtomicRCG(CGF);
5792   }
5793 
5794   CGF.EmitBranch(DefaultBB);
5795   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5796 }
5797 
5798 /// Generates unique name for artificial threadprivate variables.
5799 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5800 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5801                                       const Expr *Ref) {
5802   SmallString<256> Buffer;
5803   llvm::raw_svector_ostream Out(Buffer);
5804   const clang::DeclRefExpr *DE;
5805   const VarDecl *D = ::getBaseDecl(Ref, DE);
5806   if (!D)
5807     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5808   D = D->getCanonicalDecl();
5809   std::string Name = CGM.getOpenMPRuntime().getName(
5810       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5811   Out << Prefix << Name << "_"
5812       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5813   return std::string(Out.str());
5814 }
5815 
5816 /// Emits reduction initializer function:
5817 /// \code
5818 /// void @.red_init(void* %arg, void* %orig) {
5819 /// %0 = bitcast void* %arg to <type>*
5820 /// store <type> <init>, <type>* %0
5821 /// ret void
5822 /// }
5823 /// \endcode
5824 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5825                                            SourceLocation Loc,
5826                                            ReductionCodeGen &RCG, unsigned N) {
5827   ASTContext &C = CGM.getContext();
5828   QualType VoidPtrTy = C.VoidPtrTy;
5829   VoidPtrTy.addRestrict();
5830   FunctionArgList Args;
5831   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5832                           ImplicitParamDecl::Other);
5833   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5834                               ImplicitParamDecl::Other);
5835   Args.emplace_back(&Param);
5836   Args.emplace_back(&ParamOrig);
5837   const auto &FnInfo =
5838       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5839   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5840   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5841   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5842                                     Name, &CGM.getModule());
5843   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5844   Fn->setDoesNotRecurse();
5845   CodeGenFunction CGF(CGM);
5846   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5847   Address PrivateAddr = CGF.EmitLoadOfPointer(
5848       CGF.GetAddrOfLocalVar(&Param),
5849       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5850   llvm::Value *Size = nullptr;
5851   // If the size of the reduction item is non-constant, load it from global
5852   // threadprivate variable.
5853   if (RCG.getSizes(N).second) {
5854     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5855         CGF, CGM.getContext().getSizeType(),
5856         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5857     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5858                                 CGM.getContext().getSizeType(), Loc);
5859   }
5860   RCG.emitAggregateType(CGF, N, Size);
5861   LValue OrigLVal;
5862   // If initializer uses initializer from declare reduction construct, emit a
5863   // pointer to the address of the original reduction item (reuired by reduction
5864   // initializer)
5865   if (RCG.usesReductionInitializer(N)) {
5866     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5867     SharedAddr = CGF.EmitLoadOfPointer(
5868         SharedAddr,
5869         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5870     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5871   } else {
5872     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5873         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5874         CGM.getContext().VoidPtrTy);
5875   }
5876   // Emit the initializer:
5877   // %0 = bitcast void* %arg to <type>*
5878   // store <type> <init>, <type>* %0
5879   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5880                          [](CodeGenFunction &) { return false; });
5881   CGF.FinishFunction();
5882   return Fn;
5883 }
5884 
5885 /// Emits reduction combiner function:
5886 /// \code
5887 /// void @.red_comb(void* %arg0, void* %arg1) {
5888 /// %lhs = bitcast void* %arg0 to <type>*
5889 /// %rhs = bitcast void* %arg1 to <type>*
5890 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5891 /// store <type> %2, <type>* %lhs
5892 /// ret void
5893 /// }
5894 /// \endcode
5895 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5896                                            SourceLocation Loc,
5897                                            ReductionCodeGen &RCG, unsigned N,
5898                                            const Expr *ReductionOp,
5899                                            const Expr *LHS, const Expr *RHS,
5900                                            const Expr *PrivateRef) {
5901   ASTContext &C = CGM.getContext();
5902   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5903   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5904   FunctionArgList Args;
5905   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5906                                C.VoidPtrTy, ImplicitParamDecl::Other);
5907   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5908                             ImplicitParamDecl::Other);
5909   Args.emplace_back(&ParamInOut);
5910   Args.emplace_back(&ParamIn);
5911   const auto &FnInfo =
5912       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5913   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5914   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5915   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5916                                     Name, &CGM.getModule());
5917   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5918   Fn->setDoesNotRecurse();
5919   CodeGenFunction CGF(CGM);
5920   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5921   llvm::Value *Size = nullptr;
5922   // If the size of the reduction item is non-constant, load it from global
5923   // threadprivate variable.
5924   if (RCG.getSizes(N).second) {
5925     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5926         CGF, CGM.getContext().getSizeType(),
5927         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5928     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5929                                 CGM.getContext().getSizeType(), Loc);
5930   }
5931   RCG.emitAggregateType(CGF, N, Size);
5932   // Remap lhs and rhs variables to the addresses of the function arguments.
5933   // %lhs = bitcast void* %arg0 to <type>*
5934   // %rhs = bitcast void* %arg1 to <type>*
5935   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5936   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5937     // Pull out the pointer to the variable.
5938     Address PtrAddr = CGF.EmitLoadOfPointer(
5939         CGF.GetAddrOfLocalVar(&ParamInOut),
5940         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5941     return CGF.Builder.CreateElementBitCast(
5942         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5943   });
5944   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5945     // Pull out the pointer to the variable.
5946     Address PtrAddr = CGF.EmitLoadOfPointer(
5947         CGF.GetAddrOfLocalVar(&ParamIn),
5948         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5949     return CGF.Builder.CreateElementBitCast(
5950         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5951   });
5952   PrivateScope.Privatize();
5953   // Emit the combiner body:
5954   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5955   // store <type> %2, <type>* %lhs
5956   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5957       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5958       cast<DeclRefExpr>(RHS));
5959   CGF.FinishFunction();
5960   return Fn;
5961 }
5962 
5963 /// Emits reduction finalizer function:
5964 /// \code
5965 /// void @.red_fini(void* %arg) {
5966 /// %0 = bitcast void* %arg to <type>*
5967 /// <destroy>(<type>* %0)
5968 /// ret void
5969 /// }
5970 /// \endcode
5971 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5972                                            SourceLocation Loc,
5973                                            ReductionCodeGen &RCG, unsigned N) {
5974   if (!RCG.needCleanups(N))
5975     return nullptr;
5976   ASTContext &C = CGM.getContext();
5977   FunctionArgList Args;
5978   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5979                           ImplicitParamDecl::Other);
5980   Args.emplace_back(&Param);
5981   const auto &FnInfo =
5982       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5983   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5984   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5985   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5986                                     Name, &CGM.getModule());
5987   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5988   Fn->setDoesNotRecurse();
5989   CodeGenFunction CGF(CGM);
5990   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5991   Address PrivateAddr = CGF.EmitLoadOfPointer(
5992       CGF.GetAddrOfLocalVar(&Param),
5993       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5994   llvm::Value *Size = nullptr;
5995   // If the size of the reduction item is non-constant, load it from global
5996   // threadprivate variable.
5997   if (RCG.getSizes(N).second) {
5998     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5999         CGF, CGM.getContext().getSizeType(),
6000         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6001     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6002                                 CGM.getContext().getSizeType(), Loc);
6003   }
6004   RCG.emitAggregateType(CGF, N, Size);
6005   // Emit the finalizer body:
6006   // <destroy>(<type>* %0)
6007   RCG.emitCleanups(CGF, N, PrivateAddr);
6008   CGF.FinishFunction(Loc);
6009   return Fn;
6010 }
6011 
6012 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6013     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6014     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6015   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6016     return nullptr;
6017 
6018   // Build typedef struct:
6019   // kmp_taskred_input {
6020   //   void *reduce_shar; // shared reduction item
6021   //   void *reduce_orig; // original reduction item used for initialization
6022   //   size_t reduce_size; // size of data item
6023   //   void *reduce_init; // data initialization routine
6024   //   void *reduce_fini; // data finalization routine
6025   //   void *reduce_comb; // data combiner routine
6026   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6027   // } kmp_taskred_input_t;
6028   ASTContext &C = CGM.getContext();
6029   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6030   RD->startDefinition();
6031   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6032   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6033   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6034   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6036   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6037   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6038       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6039   RD->completeDefinition();
6040   QualType RDType = C.getRecordType(RD);
6041   unsigned Size = Data.ReductionVars.size();
6042   llvm::APInt ArraySize(/*numBits=*/64, Size);
6043   QualType ArrayRDType = C.getConstantArrayType(
6044       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6045   // kmp_task_red_input_t .rd_input.[Size];
6046   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6047   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6048                        Data.ReductionCopies, Data.ReductionOps);
6049   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6050     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6051     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6052                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6053     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6054         TaskRedInput.getPointer(), Idxs,
6055         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6056         ".rd_input.gep.");
6057     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6058     // ElemLVal.reduce_shar = &Shareds[Cnt];
6059     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6060     RCG.emitSharedOrigLValue(CGF, Cnt);
6061     llvm::Value *CastedShared =
6062         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6063     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6064     // ElemLVal.reduce_orig = &Origs[Cnt];
6065     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6066     llvm::Value *CastedOrig =
6067         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6068     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6069     RCG.emitAggregateType(CGF, Cnt);
6070     llvm::Value *SizeValInChars;
6071     llvm::Value *SizeVal;
6072     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6073     // We use delayed creation/initialization for VLAs and array sections. It is
6074     // required because runtime does not provide the way to pass the sizes of
6075     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6076     // threadprivate global variables are used to store these values and use
6077     // them in the functions.
6078     bool DelayedCreation = !!SizeVal;
6079     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6080                                                /*isSigned=*/false);
6081     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6082     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6083     // ElemLVal.reduce_init = init;
6084     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6085     llvm::Value *InitAddr =
6086         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6087     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6088     // ElemLVal.reduce_fini = fini;
6089     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6090     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6091     llvm::Value *FiniAddr = Fini
6092                                 ? CGF.EmitCastToVoidPtr(Fini)
6093                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6094     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6095     // ElemLVal.reduce_comb = comb;
6096     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6097     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6098         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6099         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6100     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6101     // ElemLVal.flags = 0;
6102     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6103     if (DelayedCreation) {
6104       CGF.EmitStoreOfScalar(
6105           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6106           FlagsLVal);
6107     } else
6108       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6109                                  FlagsLVal.getType());
6110   }
6111   if (Data.IsReductionWithTaskMod) {
6112     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6113     // is_ws, int num, void *data);
6114     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6115     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6116                                                   CGM.IntTy, /*isSigned=*/true);
6117     llvm::Value *Args[] = {
6118         IdentTLoc, GTid,
6119         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6120                                /*isSigned=*/true),
6121         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6122         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6123             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6124     return CGF.EmitRuntimeCall(
6125         OMPBuilder.getOrCreateRuntimeFunction(
6126             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6127         Args);
6128   }
6129   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6130   llvm::Value *Args[] = {
6131       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6132                                 /*isSigned=*/true),
6133       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6134       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6135                                                       CGM.VoidPtrTy)};
6136   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6137                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6138                              Args);
6139 }
6140 
6141 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6142                                             SourceLocation Loc,
6143                                             bool IsWorksharingReduction) {
6144   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6145   // is_ws, int num, void *data);
6146   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6147   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6148                                                 CGM.IntTy, /*isSigned=*/true);
6149   llvm::Value *Args[] = {IdentTLoc, GTid,
6150                          llvm::ConstantInt::get(CGM.IntTy,
6151                                                 IsWorksharingReduction ? 1 : 0,
6152                                                 /*isSigned=*/true)};
6153   (void)CGF.EmitRuntimeCall(
6154       OMPBuilder.getOrCreateRuntimeFunction(
6155           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6156       Args);
6157 }
6158 
6159 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6160                                               SourceLocation Loc,
6161                                               ReductionCodeGen &RCG,
6162                                               unsigned N) {
6163   auto Sizes = RCG.getSizes(N);
6164   // Emit threadprivate global variable if the type is non-constant
6165   // (Sizes.second = nullptr).
6166   if (Sizes.second) {
6167     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6168                                                      /*isSigned=*/false);
6169     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6170         CGF, CGM.getContext().getSizeType(),
6171         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6172     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6173   }
6174 }
6175 
6176 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6177                                               SourceLocation Loc,
6178                                               llvm::Value *ReductionsPtr,
6179                                               LValue SharedLVal) {
6180   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6181   // *d);
6182   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6183                                                    CGM.IntTy,
6184                                                    /*isSigned=*/true),
6185                          ReductionsPtr,
6186                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6187                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6188   return Address(
6189       CGF.EmitRuntimeCall(
6190           OMPBuilder.getOrCreateRuntimeFunction(
6191               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6192           Args),
6193       SharedLVal.getAlignment());
6194 }
6195 
6196 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6197                                        SourceLocation Loc) {
6198   if (!CGF.HaveInsertPoint())
6199     return;
6200 
6201   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6202     OMPBuilder.createTaskwait(CGF.Builder);
6203   } else {
6204     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6205     // global_tid);
6206     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6207     // Ignore return result until untied tasks are supported.
6208     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6209                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6210                         Args);
6211   }
6212 
6213   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6214     Region->emitUntiedSwitch(CGF);
6215 }
6216 
6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6218                                            OpenMPDirectiveKind InnerKind,
6219                                            const RegionCodeGenTy &CodeGen,
6220                                            bool HasCancel) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6224                                  InnerKind != OMPD_critical &&
6225                                      InnerKind != OMPD_master);
6226   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6227 }
6228 
6229 namespace {
6230 enum RTCancelKind {
6231   CancelNoreq = 0,
6232   CancelParallel = 1,
6233   CancelLoop = 2,
6234   CancelSections = 3,
6235   CancelTaskgroup = 4
6236 };
6237 } // anonymous namespace
6238 
6239 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6240   RTCancelKind CancelKind = CancelNoreq;
6241   if (CancelRegion == OMPD_parallel)
6242     CancelKind = CancelParallel;
6243   else if (CancelRegion == OMPD_for)
6244     CancelKind = CancelLoop;
6245   else if (CancelRegion == OMPD_sections)
6246     CancelKind = CancelSections;
6247   else {
6248     assert(CancelRegion == OMPD_taskgroup);
6249     CancelKind = CancelTaskgroup;
6250   }
6251   return CancelKind;
6252 }
6253 
6254 void CGOpenMPRuntime::emitCancellationPointCall(
6255     CodeGenFunction &CGF, SourceLocation Loc,
6256     OpenMPDirectiveKind CancelRegion) {
6257   if (!CGF.HaveInsertPoint())
6258     return;
6259   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6260   // global_tid, kmp_int32 cncl_kind);
6261   if (auto *OMPRegionInfo =
6262           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6263     // For 'cancellation point taskgroup', the task region info may not have a
6264     // cancel. This may instead happen in another adjacent task.
6265     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6266       llvm::Value *Args[] = {
6267           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6268           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6269       // Ignore return result until untied tasks are supported.
6270       llvm::Value *Result = CGF.EmitRuntimeCall(
6271           OMPBuilder.getOrCreateRuntimeFunction(
6272               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6273           Args);
6274       // if (__kmpc_cancellationpoint()) {
6275       //   exit from construct;
6276       // }
6277       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6278       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6279       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6280       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6281       CGF.EmitBlock(ExitBB);
6282       // exit from construct;
6283       CodeGenFunction::JumpDest CancelDest =
6284           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6285       CGF.EmitBranchThroughCleanup(CancelDest);
6286       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6287     }
6288   }
6289 }
6290 
6291 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6292                                      const Expr *IfCond,
6293                                      OpenMPDirectiveKind CancelRegion) {
6294   if (!CGF.HaveInsertPoint())
6295     return;
6296   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6297   // kmp_int32 cncl_kind);
6298   auto &M = CGM.getModule();
6299   if (auto *OMPRegionInfo =
6300           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6301     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6302                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6303       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6304       llvm::Value *Args[] = {
6305           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6306           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6307       // Ignore return result until untied tasks are supported.
6308       llvm::Value *Result = CGF.EmitRuntimeCall(
6309           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6310       // if (__kmpc_cancel()) {
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       // exit from construct;
6319       CodeGenFunction::JumpDest CancelDest =
6320           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321       CGF.EmitBranchThroughCleanup(CancelDest);
6322       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323     };
6324     if (IfCond) {
6325       emitIfClause(CGF, IfCond, ThenGen,
6326                    [](CodeGenFunction &, PrePostActionTy &) {});
6327     } else {
6328       RegionCodeGenTy ThenRCG(ThenGen);
6329       ThenRCG(CGF);
6330     }
6331   }
6332 }
6333 
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338 
6339 public:
6340   OMPUsesAllocatorsActionTy(
6341       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342       : Allocators(Allocators) {}
6343   void Enter(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348           CGF, AllocatorData.first, AllocatorData.second);
6349     }
6350   }
6351   void Exit(CodeGenFunction &CGF) override {
6352     if (!CGF.HaveInsertPoint())
6353       return;
6354     for (const auto &AllocatorData : Allocators) {
6355       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356                                                         AllocatorData.first);
6357     }
6358   }
6359 };
6360 } // namespace
6361 
6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363     const OMPExecutableDirective &D, StringRef ParentName,
6364     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366   assert(!ParentName.empty() && "Invalid target region parent name!");
6367   HasEmittedTargetRegion = true;
6368   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372       if (!D.AllocatorTraits)
6373         continue;
6374       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375     }
6376   }
6377   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378   CodeGen.setAction(UsesAllocatorAction);
6379   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380                                    IsOffloadEntry, CodeGen);
6381 }
6382 
6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384                                              const Expr *Allocator,
6385                                              const Expr *AllocatorTraits) {
6386   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388   // Use default memspace handle.
6389   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390   llvm::Value *NumTraits = llvm::ConstantInt::get(
6391       CGF.IntTy, cast<ConstantArrayType>(
6392                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393                      ->getSize()
6394                      .getLimitedValue());
6395   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399                                            AllocatorTraitsLVal.getBaseInfo(),
6400                                            AllocatorTraitsLVal.getTBAAInfo());
6401   llvm::Value *Traits =
6402       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403 
6404   llvm::Value *AllocatorVal =
6405       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6406                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408   // Store to allocator.
6409   CGF.EmitVarDecl(*cast<VarDecl>(
6410       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412   AllocatorVal =
6413       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414                                Allocator->getType(), Allocator->getExprLoc());
6415   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417 
6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419                                              const Expr *Allocator) {
6420   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423   llvm::Value *AllocatorVal =
6424       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426                                           CGF.getContext().VoidPtrTy,
6427                                           Allocator->getExprLoc());
6428   (void)CGF.EmitRuntimeCall(
6429       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6430                                             OMPRTL___kmpc_destroy_allocator),
6431       {ThreadId, AllocatorVal});
6432 }
6433 
6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435     const OMPExecutableDirective &D, StringRef ParentName,
6436     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438   // Create a unique name for the entry function using the source location
6439   // information of the current target region. The name will be something like:
6440   //
6441   // __omp_offloading_DD_FFFF_PP_lBB
6442   //
6443   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444   // mangled name of the function that encloses the target region and BB is the
6445   // line number of the target region.
6446 
6447   unsigned DeviceID;
6448   unsigned FileID;
6449   unsigned Line;
6450   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451                            Line);
6452   SmallString<64> EntryFnName;
6453   {
6454     llvm::raw_svector_ostream OS(EntryFnName);
6455     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457   }
6458 
6459   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460 
6461   CodeGenFunction CGF(CGM, true);
6462   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464 
6465   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466 
6467   // If this target outline function is not an offload entry, we don't need to
6468   // register it.
6469   if (!IsOffloadEntry)
6470     return;
6471 
6472   // The target region ID is used by the runtime library to identify the current
6473   // target region, so it only has to be unique and not necessarily point to
6474   // anything. It could be the pointer to the outlined function that implements
6475   // the target region, but we aren't using that so that the compiler doesn't
6476   // need to keep that, and could therefore inline the host function if proven
6477   // worthwhile during optimization. In the other hand, if emitting code for the
6478   // device, the ID has to be the function address so that it can retrieved from
6479   // the offloading entry and launched by the runtime library. We also mark the
6480   // outlined function to have external linkage in case we are emitting code for
6481   // the device, because these functions will be entry points to the device.
6482 
6483   if (CGM.getLangOpts().OpenMPIsDevice) {
6484     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486     OutlinedFn->setDSOLocal(false);
6487     if (CGM.getTriple().isAMDGCN())
6488       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6489   } else {
6490     std::string Name = getName({EntryFnName, "region_id"});
6491     OutlinedFnID = new llvm::GlobalVariable(
6492         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6493         llvm::GlobalValue::WeakAnyLinkage,
6494         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6495   }
6496 
6497   // Register the information for the entry associated with this target region.
6498   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6499       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6500       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6501 }
6502 
6503 /// Checks if the expression is constant or does not have non-trivial function
6504 /// calls.
6505 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6506   // We can skip constant expressions.
6507   // We can skip expressions with trivial calls or simple expressions.
6508   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6509           !E->hasNonTrivialCall(Ctx)) &&
6510          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6511 }
6512 
6513 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6514                                                     const Stmt *Body) {
6515   const Stmt *Child = Body->IgnoreContainers();
6516   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6517     Child = nullptr;
6518     for (const Stmt *S : C->body()) {
6519       if (const auto *E = dyn_cast<Expr>(S)) {
6520         if (isTrivial(Ctx, E))
6521           continue;
6522       }
6523       // Some of the statements can be ignored.
6524       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6525           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6526         continue;
6527       // Analyze declarations.
6528       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6529         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6530               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6531                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6532                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6533                   isa<UsingDirectiveDecl>(D) ||
6534                   isa<OMPDeclareReductionDecl>(D) ||
6535                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6536                 return true;
6537               const auto *VD = dyn_cast<VarDecl>(D);
6538               if (!VD)
6539                 return false;
6540               return VD->isConstexpr() ||
6541                      ((VD->getType().isTrivialType(Ctx) ||
6542                        VD->getType()->isReferenceType()) &&
6543                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6544             }))
6545           continue;
6546       }
6547       // Found multiple children - cannot get the one child only.
6548       if (Child)
6549         return nullptr;
6550       Child = S;
6551     }
6552     if (Child)
6553       Child = Child->IgnoreContainers();
6554   }
6555   return Child;
6556 }
6557 
6558 /// Emit the number of teams for a target directive.  Inspect the num_teams
6559 /// clause associated with a teams construct combined or closely nested
6560 /// with the target directive.
6561 ///
6562 /// Emit a team of size one for directives such as 'target parallel' that
6563 /// have no associated teams construct.
6564 ///
6565 /// Otherwise, return nullptr.
6566 static llvm::Value *
6567 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6568                                const OMPExecutableDirective &D) {
6569   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6570          "Clauses associated with the teams directive expected to be emitted "
6571          "only for the host!");
6572   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6573   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6574          "Expected target-based executable directive.");
6575   CGBuilderTy &Bld = CGF.Builder;
6576   switch (DirectiveKind) {
6577   case OMPD_target: {
6578     const auto *CS = D.getInnermostCapturedStmt();
6579     const auto *Body =
6580         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6581     const Stmt *ChildStmt =
6582         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6583     if (const auto *NestedDir =
6584             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6585       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6586         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6587           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6588           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6589           const Expr *NumTeams =
6590               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6591           llvm::Value *NumTeamsVal =
6592               CGF.EmitScalarExpr(NumTeams,
6593                                  /*IgnoreResultAssign*/ true);
6594           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6595                                    /*isSigned=*/true);
6596         }
6597         return Bld.getInt32(0);
6598       }
6599       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6600           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6601         return Bld.getInt32(1);
6602       return Bld.getInt32(0);
6603     }
6604     return nullptr;
6605   }
6606   case OMPD_target_teams:
6607   case OMPD_target_teams_distribute:
6608   case OMPD_target_teams_distribute_simd:
6609   case OMPD_target_teams_distribute_parallel_for:
6610   case OMPD_target_teams_distribute_parallel_for_simd: {
6611     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6612       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6613       const Expr *NumTeams =
6614           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6615       llvm::Value *NumTeamsVal =
6616           CGF.EmitScalarExpr(NumTeams,
6617                              /*IgnoreResultAssign*/ true);
6618       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6619                                /*isSigned=*/true);
6620     }
6621     return Bld.getInt32(0);
6622   }
6623   case OMPD_target_parallel:
6624   case OMPD_target_parallel_for:
6625   case OMPD_target_parallel_for_simd:
6626   case OMPD_target_simd:
6627     return Bld.getInt32(1);
6628   case OMPD_parallel:
6629   case OMPD_for:
6630   case OMPD_parallel_for:
6631   case OMPD_parallel_master:
6632   case OMPD_parallel_sections:
6633   case OMPD_for_simd:
6634   case OMPD_parallel_for_simd:
6635   case OMPD_cancel:
6636   case OMPD_cancellation_point:
6637   case OMPD_ordered:
6638   case OMPD_threadprivate:
6639   case OMPD_allocate:
6640   case OMPD_task:
6641   case OMPD_simd:
6642   case OMPD_sections:
6643   case OMPD_section:
6644   case OMPD_single:
6645   case OMPD_master:
6646   case OMPD_critical:
6647   case OMPD_taskyield:
6648   case OMPD_barrier:
6649   case OMPD_taskwait:
6650   case OMPD_taskgroup:
6651   case OMPD_atomic:
6652   case OMPD_flush:
6653   case OMPD_depobj:
6654   case OMPD_scan:
6655   case OMPD_teams:
6656   case OMPD_target_data:
6657   case OMPD_target_exit_data:
6658   case OMPD_target_enter_data:
6659   case OMPD_distribute:
6660   case OMPD_distribute_simd:
6661   case OMPD_distribute_parallel_for:
6662   case OMPD_distribute_parallel_for_simd:
6663   case OMPD_teams_distribute:
6664   case OMPD_teams_distribute_simd:
6665   case OMPD_teams_distribute_parallel_for:
6666   case OMPD_teams_distribute_parallel_for_simd:
6667   case OMPD_target_update:
6668   case OMPD_declare_simd:
6669   case OMPD_declare_variant:
6670   case OMPD_begin_declare_variant:
6671   case OMPD_end_declare_variant:
6672   case OMPD_declare_target:
6673   case OMPD_end_declare_target:
6674   case OMPD_declare_reduction:
6675   case OMPD_declare_mapper:
6676   case OMPD_taskloop:
6677   case OMPD_taskloop_simd:
6678   case OMPD_master_taskloop:
6679   case OMPD_master_taskloop_simd:
6680   case OMPD_parallel_master_taskloop:
6681   case OMPD_parallel_master_taskloop_simd:
6682   case OMPD_requires:
6683   case OMPD_unknown:
6684     break;
6685   default:
6686     break;
6687   }
6688   llvm_unreachable("Unexpected directive kind.");
6689 }
6690 
6691 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6692                                   llvm::Value *DefaultThreadLimitVal) {
6693   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6694       CGF.getContext(), CS->getCapturedStmt());
6695   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6696     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6697       llvm::Value *NumThreads = nullptr;
6698       llvm::Value *CondVal = nullptr;
6699       // Handle if clause. If if clause present, the number of threads is
6700       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6701       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6702         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6703         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6704         const OMPIfClause *IfClause = nullptr;
6705         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6706           if (C->getNameModifier() == OMPD_unknown ||
6707               C->getNameModifier() == OMPD_parallel) {
6708             IfClause = C;
6709             break;
6710           }
6711         }
6712         if (IfClause) {
6713           const Expr *Cond = IfClause->getCondition();
6714           bool Result;
6715           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6716             if (!Result)
6717               return CGF.Builder.getInt32(1);
6718           } else {
6719             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6720             if (const auto *PreInit =
6721                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6722               for (const auto *I : PreInit->decls()) {
6723                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6724                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6725                 } else {
6726                   CodeGenFunction::AutoVarEmission Emission =
6727                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6728                   CGF.EmitAutoVarCleanups(Emission);
6729                 }
6730               }
6731             }
6732             CondVal = CGF.EvaluateExprAsBool(Cond);
6733           }
6734         }
6735       }
6736       // Check the value of num_threads clause iff if clause was not specified
6737       // or is not evaluated to false.
6738       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6739         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6740         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6741         const auto *NumThreadsClause =
6742             Dir->getSingleClause<OMPNumThreadsClause>();
6743         CodeGenFunction::LexicalScope Scope(
6744             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6745         if (const auto *PreInit =
6746                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6747           for (const auto *I : PreInit->decls()) {
6748             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6749               CGF.EmitVarDecl(cast<VarDecl>(*I));
6750             } else {
6751               CodeGenFunction::AutoVarEmission Emission =
6752                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6753               CGF.EmitAutoVarCleanups(Emission);
6754             }
6755           }
6756         }
6757         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6758         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6759                                                /*isSigned=*/false);
6760         if (DefaultThreadLimitVal)
6761           NumThreads = CGF.Builder.CreateSelect(
6762               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6763               DefaultThreadLimitVal, NumThreads);
6764       } else {
6765         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6766                                            : CGF.Builder.getInt32(0);
6767       }
6768       // Process condition of the if clause.
6769       if (CondVal) {
6770         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6771                                               CGF.Builder.getInt32(1));
6772       }
6773       return NumThreads;
6774     }
6775     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6776       return CGF.Builder.getInt32(1);
6777     return DefaultThreadLimitVal;
6778   }
6779   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6780                                : CGF.Builder.getInt32(0);
6781 }
6782 
6783 /// Emit the number of threads for a target directive.  Inspect the
6784 /// thread_limit clause associated with a teams construct combined or closely
6785 /// nested with the target directive.
6786 ///
6787 /// Emit the num_threads clause for directives such as 'target parallel' that
6788 /// have no associated teams construct.
6789 ///
6790 /// Otherwise, return nullptr.
6791 static llvm::Value *
6792 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6793                                  const OMPExecutableDirective &D) {
6794   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6795          "Clauses associated with the teams directive expected to be emitted "
6796          "only for the host!");
6797   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6798   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6799          "Expected target-based executable directive.");
6800   CGBuilderTy &Bld = CGF.Builder;
6801   llvm::Value *ThreadLimitVal = nullptr;
6802   llvm::Value *NumThreadsVal = nullptr;
6803   switch (DirectiveKind) {
6804   case OMPD_target: {
6805     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6806     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6807       return NumThreads;
6808     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6809         CGF.getContext(), CS->getCapturedStmt());
6810     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6811       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6812         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6813         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6814         const auto *ThreadLimitClause =
6815             Dir->getSingleClause<OMPThreadLimitClause>();
6816         CodeGenFunction::LexicalScope Scope(
6817             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6818         if (const auto *PreInit =
6819                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6820           for (const auto *I : PreInit->decls()) {
6821             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6822               CGF.EmitVarDecl(cast<VarDecl>(*I));
6823             } else {
6824               CodeGenFunction::AutoVarEmission Emission =
6825                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6826               CGF.EmitAutoVarCleanups(Emission);
6827             }
6828           }
6829         }
6830         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6831             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6832         ThreadLimitVal =
6833             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6834       }
6835       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6836           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6837         CS = Dir->getInnermostCapturedStmt();
6838         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6839             CGF.getContext(), CS->getCapturedStmt());
6840         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6841       }
6842       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6843           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6844         CS = Dir->getInnermostCapturedStmt();
6845         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6846           return NumThreads;
6847       }
6848       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6849         return Bld.getInt32(1);
6850     }
6851     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6852   }
6853   case OMPD_target_teams: {
6854     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6855       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6856       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6857       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6858           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6859       ThreadLimitVal =
6860           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6861     }
6862     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6863     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6864       return NumThreads;
6865     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6866         CGF.getContext(), CS->getCapturedStmt());
6867     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6868       if (Dir->getDirectiveKind() == OMPD_distribute) {
6869         CS = Dir->getInnermostCapturedStmt();
6870         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6871           return NumThreads;
6872       }
6873     }
6874     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6875   }
6876   case OMPD_target_teams_distribute:
6877     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6878       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6879       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6880       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6881           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6882       ThreadLimitVal =
6883           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6884     }
6885     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6886   case OMPD_target_parallel:
6887   case OMPD_target_parallel_for:
6888   case OMPD_target_parallel_for_simd:
6889   case OMPD_target_teams_distribute_parallel_for:
6890   case OMPD_target_teams_distribute_parallel_for_simd: {
6891     llvm::Value *CondVal = nullptr;
6892     // Handle if clause. If if clause present, the number of threads is
6893     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6894     if (D.hasClausesOfKind<OMPIfClause>()) {
6895       const OMPIfClause *IfClause = nullptr;
6896       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6897         if (C->getNameModifier() == OMPD_unknown ||
6898             C->getNameModifier() == OMPD_parallel) {
6899           IfClause = C;
6900           break;
6901         }
6902       }
6903       if (IfClause) {
6904         const Expr *Cond = IfClause->getCondition();
6905         bool Result;
6906         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6907           if (!Result)
6908             return Bld.getInt32(1);
6909         } else {
6910           CodeGenFunction::RunCleanupsScope Scope(CGF);
6911           CondVal = CGF.EvaluateExprAsBool(Cond);
6912         }
6913       }
6914     }
6915     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6917       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6918       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6919           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6920       ThreadLimitVal =
6921           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6922     }
6923     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6925       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6926       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6927           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6928       NumThreadsVal =
6929           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6930       ThreadLimitVal = ThreadLimitVal
6931                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6932                                                                 ThreadLimitVal),
6933                                               NumThreadsVal, ThreadLimitVal)
6934                            : NumThreadsVal;
6935     }
6936     if (!ThreadLimitVal)
6937       ThreadLimitVal = Bld.getInt32(0);
6938     if (CondVal)
6939       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6940     return ThreadLimitVal;
6941   }
6942   case OMPD_target_teams_distribute_simd:
6943   case OMPD_target_simd:
6944     return Bld.getInt32(1);
6945   case OMPD_parallel:
6946   case OMPD_for:
6947   case OMPD_parallel_for:
6948   case OMPD_parallel_master:
6949   case OMPD_parallel_sections:
6950   case OMPD_for_simd:
6951   case OMPD_parallel_for_simd:
6952   case OMPD_cancel:
6953   case OMPD_cancellation_point:
6954   case OMPD_ordered:
6955   case OMPD_threadprivate:
6956   case OMPD_allocate:
6957   case OMPD_task:
6958   case OMPD_simd:
6959   case OMPD_sections:
6960   case OMPD_section:
6961   case OMPD_single:
6962   case OMPD_master:
6963   case OMPD_critical:
6964   case OMPD_taskyield:
6965   case OMPD_barrier:
6966   case OMPD_taskwait:
6967   case OMPD_taskgroup:
6968   case OMPD_atomic:
6969   case OMPD_flush:
6970   case OMPD_depobj:
6971   case OMPD_scan:
6972   case OMPD_teams:
6973   case OMPD_target_data:
6974   case OMPD_target_exit_data:
6975   case OMPD_target_enter_data:
6976   case OMPD_distribute:
6977   case OMPD_distribute_simd:
6978   case OMPD_distribute_parallel_for:
6979   case OMPD_distribute_parallel_for_simd:
6980   case OMPD_teams_distribute:
6981   case OMPD_teams_distribute_simd:
6982   case OMPD_teams_distribute_parallel_for:
6983   case OMPD_teams_distribute_parallel_for_simd:
6984   case OMPD_target_update:
6985   case OMPD_declare_simd:
6986   case OMPD_declare_variant:
6987   case OMPD_begin_declare_variant:
6988   case OMPD_end_declare_variant:
6989   case OMPD_declare_target:
6990   case OMPD_end_declare_target:
6991   case OMPD_declare_reduction:
6992   case OMPD_declare_mapper:
6993   case OMPD_taskloop:
6994   case OMPD_taskloop_simd:
6995   case OMPD_master_taskloop:
6996   case OMPD_master_taskloop_simd:
6997   case OMPD_parallel_master_taskloop:
6998   case OMPD_parallel_master_taskloop_simd:
6999   case OMPD_requires:
7000   case OMPD_unknown:
7001     break;
7002   default:
7003     break;
7004   }
7005   llvm_unreachable("Unsupported directive kind.");
7006 }
7007 
7008 namespace {
7009 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7010 
7011 // Utility to handle information from clauses associated with a given
7012 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7013 // It provides a convenient interface to obtain the information and generate
7014 // code for that information.
7015 class MappableExprsHandler {
7016 public:
7017   /// Values for bit flags used to specify the mapping type for
7018   /// offloading.
7019   enum OpenMPOffloadMappingFlags : uint64_t {
7020     /// No flags
7021     OMP_MAP_NONE = 0x0,
7022     /// Allocate memory on the device and move data from host to device.
7023     OMP_MAP_TO = 0x01,
7024     /// Allocate memory on the device and move data from device to host.
7025     OMP_MAP_FROM = 0x02,
7026     /// Always perform the requested mapping action on the element, even
7027     /// if it was already mapped before.
7028     OMP_MAP_ALWAYS = 0x04,
7029     /// Delete the element from the device environment, ignoring the
7030     /// current reference count associated with the element.
7031     OMP_MAP_DELETE = 0x08,
7032     /// The element being mapped is a pointer-pointee pair; both the
7033     /// pointer and the pointee should be mapped.
7034     OMP_MAP_PTR_AND_OBJ = 0x10,
7035     /// This flags signals that the base address of an entry should be
7036     /// passed to the target kernel as an argument.
7037     OMP_MAP_TARGET_PARAM = 0x20,
7038     /// Signal that the runtime library has to return the device pointer
7039     /// in the current position for the data being mapped. Used when we have the
7040     /// use_device_ptr or use_device_addr clause.
7041     OMP_MAP_RETURN_PARAM = 0x40,
7042     /// This flag signals that the reference being passed is a pointer to
7043     /// private data.
7044     OMP_MAP_PRIVATE = 0x80,
7045     /// Pass the element to the device by value.
7046     OMP_MAP_LITERAL = 0x100,
7047     /// Implicit map
7048     OMP_MAP_IMPLICIT = 0x200,
7049     /// Close is a hint to the runtime to allocate memory close to
7050     /// the target device.
7051     OMP_MAP_CLOSE = 0x400,
7052     /// 0x800 is reserved for compatibility with XLC.
7053     /// Produce a runtime error if the data is not already allocated.
7054     OMP_MAP_PRESENT = 0x1000,
7055     /// Signal that the runtime library should use args as an array of
7056     /// descriptor_dim pointers and use args_size as dims. Used when we have
7057     /// non-contiguous list items in target update directive
7058     OMP_MAP_NON_CONTIG = 0x100000000000,
7059     /// The 16 MSBs of the flags indicate whether the entry is member of some
7060     /// struct/class.
7061     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7063   };
7064 
7065   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7066   static unsigned getFlagMemberOffset() {
7067     unsigned Offset = 0;
7068     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069          Remain = Remain >> 1)
7070       Offset++;
7071     return Offset;
7072   }
7073 
7074   /// Class that holds debugging information for a data mapping to be passed to
7075   /// the runtime library.
7076   class MappingExprInfo {
7077     /// The variable declaration used for the data mapping.
7078     const ValueDecl *MapDecl = nullptr;
7079     /// The original expression used in the map clause, or null if there is
7080     /// none.
7081     const Expr *MapExpr = nullptr;
7082 
7083   public:
7084     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7085         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7086 
7087     const ValueDecl *getMapDecl() const { return MapDecl; }
7088     const Expr *getMapExpr() const { return MapExpr; }
7089   };
7090 
7091   /// Class that associates information with a base pointer to be passed to the
7092   /// runtime library.
7093   class BasePointerInfo {
7094     /// The base pointer.
7095     llvm::Value *Ptr = nullptr;
7096     /// The base declaration that refers to this device pointer, or null if
7097     /// there is none.
7098     const ValueDecl *DevPtrDecl = nullptr;
7099 
7100   public:
7101     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7102         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7103     llvm::Value *operator*() const { return Ptr; }
7104     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7105     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7106   };
7107 
7108   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7109   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7110   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7111   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7112   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7113   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7114   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7115 
7116   /// This structure contains combined information generated for mappable
7117   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7118   /// mappers, and non-contiguous information.
7119   struct MapCombinedInfoTy {
7120     struct StructNonContiguousInfo {
7121       bool IsNonContiguous = false;
7122       MapDimArrayTy Dims;
7123       MapNonContiguousArrayTy Offsets;
7124       MapNonContiguousArrayTy Counts;
7125       MapNonContiguousArrayTy Strides;
7126     };
7127     MapExprsArrayTy Exprs;
7128     MapBaseValuesArrayTy BasePointers;
7129     MapValuesArrayTy Pointers;
7130     MapValuesArrayTy Sizes;
7131     MapFlagsArrayTy Types;
7132     MapMappersArrayTy Mappers;
7133     StructNonContiguousInfo NonContigInfo;
7134 
7135     /// Append arrays in \a CurInfo.
7136     void append(MapCombinedInfoTy &CurInfo) {
7137       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7138       BasePointers.append(CurInfo.BasePointers.begin(),
7139                           CurInfo.BasePointers.end());
7140       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7141       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7142       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7143       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7144       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7145                                  CurInfo.NonContigInfo.Dims.end());
7146       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7147                                     CurInfo.NonContigInfo.Offsets.end());
7148       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7149                                    CurInfo.NonContigInfo.Counts.end());
7150       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7151                                     CurInfo.NonContigInfo.Strides.end());
7152     }
7153   };
7154 
7155   /// Map between a struct and the its lowest & highest elements which have been
7156   /// mapped.
7157   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7158   ///                    HE(FieldIndex, Pointer)}
7159   struct StructRangeInfoTy {
7160     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7161         0, Address::invalid()};
7162     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7163         0, Address::invalid()};
7164     Address Base = Address::invalid();
7165     bool IsArraySection = false;
7166   };
7167 
7168 private:
7169   /// Kind that defines how a device pointer has to be returned.
7170   struct MapInfo {
7171     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7172     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7173     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7174     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7175     bool ReturnDevicePointer = false;
7176     bool IsImplicit = false;
7177     const ValueDecl *Mapper = nullptr;
7178     const Expr *VarRef = nullptr;
7179     bool ForDeviceAddr = false;
7180 
7181     MapInfo() = default;
7182     MapInfo(
7183         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7184         OpenMPMapClauseKind MapType,
7185         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7186         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7187         bool ReturnDevicePointer, bool IsImplicit,
7188         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7189         bool ForDeviceAddr = false)
7190         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7191           MotionModifiers(MotionModifiers),
7192           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7193           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7194   };
7195 
7196   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7197   /// member and there is no map information about it, then emission of that
7198   /// entry is deferred until the whole struct has been processed.
7199   struct DeferredDevicePtrEntryTy {
7200     const Expr *IE = nullptr;
7201     const ValueDecl *VD = nullptr;
7202     bool ForDeviceAddr = false;
7203 
7204     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7205                              bool ForDeviceAddr)
7206         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7207   };
7208 
7209   /// The target directive from where the mappable clauses were extracted. It
7210   /// is either a executable directive or a user-defined mapper directive.
7211   llvm::PointerUnion<const OMPExecutableDirective *,
7212                      const OMPDeclareMapperDecl *>
7213       CurDir;
7214 
7215   /// Function the directive is being generated for.
7216   CodeGenFunction &CGF;
7217 
7218   /// Set of all first private variables in the current directive.
7219   /// bool data is set to true if the variable is implicitly marked as
7220   /// firstprivate, false otherwise.
7221   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7222 
7223   /// Map between device pointer declarations and their expression components.
7224   /// The key value for declarations in 'this' is null.
7225   llvm::DenseMap<
7226       const ValueDecl *,
7227       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7228       DevPointersMap;
7229 
7230   llvm::Value *getExprTypeSize(const Expr *E) const {
7231     QualType ExprTy = E->getType().getCanonicalType();
7232 
7233     // Calculate the size for array shaping expression.
7234     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7235       llvm::Value *Size =
7236           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7237       for (const Expr *SE : OAE->getDimensions()) {
7238         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7239         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7240                                       CGF.getContext().getSizeType(),
7241                                       SE->getExprLoc());
7242         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7243       }
7244       return Size;
7245     }
7246 
7247     // Reference types are ignored for mapping purposes.
7248     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7249       ExprTy = RefTy->getPointeeType().getCanonicalType();
7250 
7251     // Given that an array section is considered a built-in type, we need to
7252     // do the calculation based on the length of the section instead of relying
7253     // on CGF.getTypeSize(E->getType()).
7254     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7255       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7256                             OAE->getBase()->IgnoreParenImpCasts())
7257                             .getCanonicalType();
7258 
7259       // If there is no length associated with the expression and lower bound is
7260       // not specified too, that means we are using the whole length of the
7261       // base.
7262       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7263           !OAE->getLowerBound())
7264         return CGF.getTypeSize(BaseTy);
7265 
7266       llvm::Value *ElemSize;
7267       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7268         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7269       } else {
7270         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7271         assert(ATy && "Expecting array type if not a pointer type.");
7272         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7273       }
7274 
7275       // If we don't have a length at this point, that is because we have an
7276       // array section with a single element.
7277       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7278         return ElemSize;
7279 
7280       if (const Expr *LenExpr = OAE->getLength()) {
7281         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7282         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7283                                              CGF.getContext().getSizeType(),
7284                                              LenExpr->getExprLoc());
7285         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7286       }
7287       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7288              OAE->getLowerBound() && "expected array_section[lb:].");
7289       // Size = sizetype - lb * elemtype;
7290       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7291       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7292       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7293                                        CGF.getContext().getSizeType(),
7294                                        OAE->getLowerBound()->getExprLoc());
7295       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7296       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7297       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7298       LengthVal = CGF.Builder.CreateSelect(
7299           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7300       return LengthVal;
7301     }
7302     return CGF.getTypeSize(ExprTy);
7303   }
7304 
7305   /// Return the corresponding bits for a given map clause modifier. Add
7306   /// a flag marking the map as a pointer if requested. Add a flag marking the
7307   /// map as the first one of a series of maps that relate to the same map
7308   /// expression.
7309   OpenMPOffloadMappingFlags getMapTypeBits(
7310       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7311       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7312       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7313     OpenMPOffloadMappingFlags Bits =
7314         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7315     switch (MapType) {
7316     case OMPC_MAP_alloc:
7317     case OMPC_MAP_release:
7318       // alloc and release is the default behavior in the runtime library,  i.e.
7319       // if we don't pass any bits alloc/release that is what the runtime is
7320       // going to do. Therefore, we don't need to signal anything for these two
7321       // type modifiers.
7322       break;
7323     case OMPC_MAP_to:
7324       Bits |= OMP_MAP_TO;
7325       break;
7326     case OMPC_MAP_from:
7327       Bits |= OMP_MAP_FROM;
7328       break;
7329     case OMPC_MAP_tofrom:
7330       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7331       break;
7332     case OMPC_MAP_delete:
7333       Bits |= OMP_MAP_DELETE;
7334       break;
7335     case OMPC_MAP_unknown:
7336       llvm_unreachable("Unexpected map type!");
7337     }
7338     if (AddPtrFlag)
7339       Bits |= OMP_MAP_PTR_AND_OBJ;
7340     if (AddIsTargetParamFlag)
7341       Bits |= OMP_MAP_TARGET_PARAM;
7342     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7343         != MapModifiers.end())
7344       Bits |= OMP_MAP_ALWAYS;
7345     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7346         != MapModifiers.end())
7347       Bits |= OMP_MAP_CLOSE;
7348     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7349         != MapModifiers.end())
7350       Bits |= OMP_MAP_PRESENT;
7351     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7352         != MotionModifiers.end())
7353       Bits |= OMP_MAP_PRESENT;
7354     if (IsNonContiguous)
7355       Bits |= OMP_MAP_NON_CONTIG;
7356     return Bits;
7357   }
7358 
7359   /// Return true if the provided expression is a final array section. A
7360   /// final array section, is one whose length can't be proved to be one.
7361   bool isFinalArraySectionExpression(const Expr *E) const {
7362     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7363 
7364     // It is not an array section and therefore not a unity-size one.
7365     if (!OASE)
7366       return false;
7367 
7368     // An array section with no colon always refer to a single element.
7369     if (OASE->getColonLocFirst().isInvalid())
7370       return false;
7371 
7372     const Expr *Length = OASE->getLength();
7373 
7374     // If we don't have a length we have to check if the array has size 1
7375     // for this dimension. Also, we should always expect a length if the
7376     // base type is pointer.
7377     if (!Length) {
7378       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7379                              OASE->getBase()->IgnoreParenImpCasts())
7380                              .getCanonicalType();
7381       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7382         return ATy->getSize().getSExtValue() != 1;
7383       // If we don't have a constant dimension length, we have to consider
7384       // the current section as having any size, so it is not necessarily
7385       // unitary. If it happen to be unity size, that's user fault.
7386       return true;
7387     }
7388 
7389     // Check if the length evaluates to 1.
7390     Expr::EvalResult Result;
7391     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7392       return true; // Can have more that size 1.
7393 
7394     llvm::APSInt ConstLength = Result.Val.getInt();
7395     return ConstLength.getSExtValue() != 1;
7396   }
7397 
7398   /// Generate the base pointers, section pointers, sizes, map type bits, and
7399   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7400   /// map type, map or motion modifiers, and expression components.
7401   /// \a IsFirstComponent should be set to true if the provided set of
7402   /// components is the first associated with a capture.
7403   void generateInfoForComponentList(
7404       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7405       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7406       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7407       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7408       bool IsFirstComponentList, bool IsImplicit,
7409       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7410       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7411       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7412           OverlappedElements = llvm::None) const {
7413     // The following summarizes what has to be generated for each map and the
7414     // types below. The generated information is expressed in this order:
7415     // base pointer, section pointer, size, flags
7416     // (to add to the ones that come from the map type and modifier).
7417     //
7418     // double d;
7419     // int i[100];
7420     // float *p;
7421     //
7422     // struct S1 {
7423     //   int i;
7424     //   float f[50];
7425     // }
7426     // struct S2 {
7427     //   int i;
7428     //   float f[50];
7429     //   S1 s;
7430     //   double *p;
7431     //   struct S2 *ps;
7432     // }
7433     // S2 s;
7434     // S2 *ps;
7435     //
7436     // map(d)
7437     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7438     //
7439     // map(i)
7440     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7441     //
7442     // map(i[1:23])
7443     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7444     //
7445     // map(p)
7446     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7447     //
7448     // map(p[1:24])
7449     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7450     // in unified shared memory mode or for local pointers
7451     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7452     //
7453     // map(s)
7454     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7455     //
7456     // map(s.i)
7457     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7458     //
7459     // map(s.s.f)
7460     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7461     //
7462     // map(s.p)
7463     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7464     //
7465     // map(to: s.p[:22])
7466     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7467     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7468     // &(s.p), &(s.p[0]), 22*sizeof(double),
7469     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7470     // (*) alloc space for struct members, only this is a target parameter
7471     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7472     //      optimizes this entry out, same in the examples below)
7473     // (***) map the pointee (map: to)
7474     //
7475     // map(s.ps)
7476     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7477     //
7478     // map(from: s.ps->s.i)
7479     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7480     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7481     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7482     //
7483     // map(to: s.ps->ps)
7484     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7485     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7486     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7487     //
7488     // map(s.ps->ps->ps)
7489     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7490     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7491     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7492     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7493     //
7494     // map(to: s.ps->ps->s.f[:22])
7495     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7496     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7497     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7498     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7499     //
7500     // map(ps)
7501     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7502     //
7503     // map(ps->i)
7504     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7505     //
7506     // map(ps->s.f)
7507     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7508     //
7509     // map(from: ps->p)
7510     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7511     //
7512     // map(to: ps->p[:22])
7513     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7514     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7515     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7516     //
7517     // map(ps->ps)
7518     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7519     //
7520     // map(from: ps->ps->s.i)
7521     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7522     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7523     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7524     //
7525     // map(from: ps->ps->ps)
7526     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7527     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7528     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7529     //
7530     // map(ps->ps->ps->ps)
7531     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7532     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7533     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7534     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7535     //
7536     // map(to: ps->ps->ps->s.f[:22])
7537     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7538     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7539     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7540     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7541     //
7542     // map(to: s.f[:22]) map(from: s.p[:33])
7543     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7544     //     sizeof(double*) (**), TARGET_PARAM
7545     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7546     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7547     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7548     // (*) allocate contiguous space needed to fit all mapped members even if
7549     //     we allocate space for members not mapped (in this example,
7550     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7551     //     them as well because they fall between &s.f[0] and &s.p)
7552     //
7553     // map(from: s.f[:22]) map(to: ps->p[:33])
7554     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7555     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7556     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7557     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7558     // (*) the struct this entry pertains to is the 2nd element in the list of
7559     //     arguments, hence MEMBER_OF(2)
7560     //
7561     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7562     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7563     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7564     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7565     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7566     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7567     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7568     // (*) the struct this entry pertains to is the 4th element in the list
7569     //     of arguments, hence MEMBER_OF(4)
7570 
7571     // Track if the map information being generated is the first for a capture.
7572     bool IsCaptureFirstInfo = IsFirstComponentList;
7573     // When the variable is on a declare target link or in a to clause with
7574     // unified memory, a reference is needed to hold the host/device address
7575     // of the variable.
7576     bool RequiresReference = false;
7577 
7578     // Scan the components from the base to the complete expression.
7579     auto CI = Components.rbegin();
7580     auto CE = Components.rend();
7581     auto I = CI;
7582 
7583     // Track if the map information being generated is the first for a list of
7584     // components.
7585     bool IsExpressionFirstInfo = true;
7586     bool FirstPointerInComplexData = false;
7587     Address BP = Address::invalid();
7588     const Expr *AssocExpr = I->getAssociatedExpression();
7589     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7590     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7591     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7592 
7593     if (isa<MemberExpr>(AssocExpr)) {
7594       // The base is the 'this' pointer. The content of the pointer is going
7595       // to be the base of the field being mapped.
7596       BP = CGF.LoadCXXThisAddress();
7597     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7598                (OASE &&
7599                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7600       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7601     } else if (OAShE &&
7602                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7603       BP = Address(
7604           CGF.EmitScalarExpr(OAShE->getBase()),
7605           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7606     } else {
7607       // The base is the reference to the variable.
7608       // BP = &Var.
7609       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7610       if (const auto *VD =
7611               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7612         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7613                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7614           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7615               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7616                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7617             RequiresReference = true;
7618             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7619           }
7620         }
7621       }
7622 
7623       // If the variable is a pointer and is being dereferenced (i.e. is not
7624       // the last component), the base has to be the pointer itself, not its
7625       // reference. References are ignored for mapping purposes.
7626       QualType Ty =
7627           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7628       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7629         // No need to generate individual map information for the pointer, it
7630         // can be associated with the combined storage if shared memory mode is
7631         // active or the base declaration is not global variable.
7632         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7633         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7634             !VD || VD->hasLocalStorage())
7635           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7636         else
7637           FirstPointerInComplexData = true;
7638         ++I;
7639       }
7640     }
7641 
7642     // Track whether a component of the list should be marked as MEMBER_OF some
7643     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7644     // in a component list should be marked as MEMBER_OF, all subsequent entries
7645     // do not belong to the base struct. E.g.
7646     // struct S2 s;
7647     // s.ps->ps->ps->f[:]
7648     //   (1) (2) (3) (4)
7649     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7650     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7651     // is the pointee of ps(2) which is not member of struct s, so it should not
7652     // be marked as such (it is still PTR_AND_OBJ).
7653     // The variable is initialized to false so that PTR_AND_OBJ entries which
7654     // are not struct members are not considered (e.g. array of pointers to
7655     // data).
7656     bool ShouldBeMemberOf = false;
7657 
7658     // Variable keeping track of whether or not we have encountered a component
7659     // in the component list which is a member expression. Useful when we have a
7660     // pointer or a final array section, in which case it is the previous
7661     // component in the list which tells us whether we have a member expression.
7662     // E.g. X.f[:]
7663     // While processing the final array section "[:]" it is "f" which tells us
7664     // whether we are dealing with a member of a declared struct.
7665     const MemberExpr *EncounteredME = nullptr;
7666 
7667     // Track for the total number of dimension. Start from one for the dummy
7668     // dimension.
7669     uint64_t DimSize = 1;
7670 
7671     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7672 
7673     for (; I != CE; ++I) {
7674       // If the current component is member of a struct (parent struct) mark it.
7675       if (!EncounteredME) {
7676         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7677         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7678         // as MEMBER_OF the parent struct.
7679         if (EncounteredME) {
7680           ShouldBeMemberOf = true;
7681           // Do not emit as complex pointer if this is actually not array-like
7682           // expression.
7683           if (FirstPointerInComplexData) {
7684             QualType Ty = std::prev(I)
7685                               ->getAssociatedDeclaration()
7686                               ->getType()
7687                               .getNonReferenceType();
7688             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7689             FirstPointerInComplexData = false;
7690           }
7691         }
7692       }
7693 
7694       auto Next = std::next(I);
7695 
7696       // We need to generate the addresses and sizes if this is the last
7697       // component, if the component is a pointer or if it is an array section
7698       // whose length can't be proved to be one. If this is a pointer, it
7699       // becomes the base address for the following components.
7700 
7701       // A final array section, is one whose length can't be proved to be one.
7702       // If the map item is non-contiguous then we don't treat any array section
7703       // as final array section.
7704       bool IsFinalArraySection =
7705           !IsNonContiguous &&
7706           isFinalArraySectionExpression(I->getAssociatedExpression());
7707 
7708       // If we have a declaration for the mapping use that, otherwise use
7709       // the base declaration of the map clause.
7710       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7711                                      ? I->getAssociatedDeclaration()
7712                                      : BaseDecl;
7713 
7714       // Get information on whether the element is a pointer. Have to do a
7715       // special treatment for array sections given that they are built-in
7716       // types.
7717       const auto *OASE =
7718           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7719       const auto *OAShE =
7720           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7721       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7722       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7723       bool IsPointer =
7724           OAShE ||
7725           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7726                        .getCanonicalType()
7727                        ->isAnyPointerType()) ||
7728           I->getAssociatedExpression()->getType()->isAnyPointerType();
7729       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7730 
7731       if (OASE)
7732         ++DimSize;
7733 
7734       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7735         // If this is not the last component, we expect the pointer to be
7736         // associated with an array expression or member expression.
7737         assert((Next == CE ||
7738                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7739                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7740                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7741                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7742                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7743                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7744                "Unexpected expression");
7745 
7746         Address LB = Address::invalid();
7747         if (OAShE) {
7748           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7749                        CGF.getContext().getTypeAlignInChars(
7750                            OAShE->getBase()->getType()));
7751         } else {
7752           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7753                    .getAddress(CGF);
7754         }
7755 
7756         // If this component is a pointer inside the base struct then we don't
7757         // need to create any entry for it - it will be combined with the object
7758         // it is pointing to into a single PTR_AND_OBJ entry.
7759         bool IsMemberPointerOrAddr =
7760             (IsPointer || ForDeviceAddr) && EncounteredME &&
7761             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7762              EncounteredME);
7763         if (!OverlappedElements.empty()) {
7764           // Handle base element with the info for overlapped elements.
7765           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7766           assert(Next == CE &&
7767                  "Expected last element for the overlapped elements.");
7768           assert(!IsPointer &&
7769                  "Unexpected base element with the pointer type.");
7770           // Mark the whole struct as the struct that requires allocation on the
7771           // device.
7772           PartialStruct.LowestElem = {0, LB};
7773           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7774               I->getAssociatedExpression()->getType());
7775           Address HB = CGF.Builder.CreateConstGEP(
7776               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7777                                                               CGF.VoidPtrTy),
7778               TypeSize.getQuantity() - 1);
7779           PartialStruct.HighestElem = {
7780               std::numeric_limits<decltype(
7781                   PartialStruct.HighestElem.first)>::max(),
7782               HB};
7783           PartialStruct.Base = BP;
7784           // Emit data for non-overlapped data.
7785           OpenMPOffloadMappingFlags Flags =
7786               OMP_MAP_MEMBER_OF |
7787               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7788                              /*AddPtrFlag=*/false,
7789                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7790           LB = BP;
7791           llvm::Value *Size = nullptr;
7792           // Do bitcopy of all non-overlapped structure elements.
7793           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7794                    Component : OverlappedElements) {
7795             Address ComponentLB = Address::invalid();
7796             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7797                  Component) {
7798               if (MC.getAssociatedDeclaration()) {
7799                 ComponentLB =
7800                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7801                         .getAddress(CGF);
7802                 Size = CGF.Builder.CreatePtrDiff(
7803                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7804                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7805                 break;
7806               }
7807             }
7808             assert(Size && "Failed to determine structure size");
7809             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7810             CombinedInfo.BasePointers.push_back(BP.getPointer());
7811             CombinedInfo.Pointers.push_back(LB.getPointer());
7812             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7813                 Size, CGF.Int64Ty, /*isSigned=*/true));
7814             CombinedInfo.Types.push_back(Flags);
7815             CombinedInfo.Mappers.push_back(nullptr);
7816             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7817                                                                       : 1);
7818             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7819           }
7820           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7821           CombinedInfo.BasePointers.push_back(BP.getPointer());
7822           CombinedInfo.Pointers.push_back(LB.getPointer());
7823           Size = CGF.Builder.CreatePtrDiff(
7824               CGF.EmitCastToVoidPtr(
7825                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7826               CGF.EmitCastToVoidPtr(LB.getPointer()));
7827           CombinedInfo.Sizes.push_back(
7828               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7829           CombinedInfo.Types.push_back(Flags);
7830           CombinedInfo.Mappers.push_back(nullptr);
7831           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7832                                                                     : 1);
7833           break;
7834         }
7835         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7836         if (!IsMemberPointerOrAddr ||
7837             (Next == CE && MapType != OMPC_MAP_unknown)) {
7838           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7839           CombinedInfo.BasePointers.push_back(BP.getPointer());
7840           CombinedInfo.Pointers.push_back(LB.getPointer());
7841           CombinedInfo.Sizes.push_back(
7842               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7843           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7844                                                                     : 1);
7845 
7846           // If Mapper is valid, the last component inherits the mapper.
7847           bool HasMapper = Mapper && Next == CE;
7848           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7849 
7850           // We need to add a pointer flag for each map that comes from the
7851           // same expression except for the first one. We also need to signal
7852           // this map is the first one that relates with the current capture
7853           // (there is a set of entries for each capture).
7854           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7855               MapType, MapModifiers, MotionModifiers, IsImplicit,
7856               !IsExpressionFirstInfo || RequiresReference ||
7857                   FirstPointerInComplexData,
7858               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7859 
7860           if (!IsExpressionFirstInfo) {
7861             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7862             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7863             if (IsPointer)
7864               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7865                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7866 
7867             if (ShouldBeMemberOf) {
7868               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7869               // should be later updated with the correct value of MEMBER_OF.
7870               Flags |= OMP_MAP_MEMBER_OF;
7871               // From now on, all subsequent PTR_AND_OBJ entries should not be
7872               // marked as MEMBER_OF.
7873               ShouldBeMemberOf = false;
7874             }
7875           }
7876 
7877           CombinedInfo.Types.push_back(Flags);
7878         }
7879 
7880         // If we have encountered a member expression so far, keep track of the
7881         // mapped member. If the parent is "*this", then the value declaration
7882         // is nullptr.
7883         if (EncounteredME) {
7884           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7885           unsigned FieldIndex = FD->getFieldIndex();
7886 
7887           // Update info about the lowest and highest elements for this struct
7888           if (!PartialStruct.Base.isValid()) {
7889             PartialStruct.LowestElem = {FieldIndex, LB};
7890             if (IsFinalArraySection) {
7891               Address HB =
7892                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7893                       .getAddress(CGF);
7894               PartialStruct.HighestElem = {FieldIndex, HB};
7895             } else {
7896               PartialStruct.HighestElem = {FieldIndex, LB};
7897             }
7898             PartialStruct.Base = BP;
7899           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7900             PartialStruct.LowestElem = {FieldIndex, LB};
7901           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7902             PartialStruct.HighestElem = {FieldIndex, LB};
7903           }
7904         }
7905 
7906         // Need to emit combined struct for array sections.
7907         if (IsFinalArraySection || IsNonContiguous)
7908           PartialStruct.IsArraySection = true;
7909 
7910         // If we have a final array section, we are done with this expression.
7911         if (IsFinalArraySection)
7912           break;
7913 
7914         // The pointer becomes the base for the next element.
7915         if (Next != CE)
7916           BP = LB;
7917 
7918         IsExpressionFirstInfo = false;
7919         IsCaptureFirstInfo = false;
7920         FirstPointerInComplexData = false;
7921       } else if (FirstPointerInComplexData) {
7922         QualType Ty = Components.rbegin()
7923                           ->getAssociatedDeclaration()
7924                           ->getType()
7925                           .getNonReferenceType();
7926         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7927         FirstPointerInComplexData = false;
7928       }
7929     }
7930 
7931     if (!IsNonContiguous)
7932       return;
7933 
7934     const ASTContext &Context = CGF.getContext();
7935 
7936     // For supporting stride in array section, we need to initialize the first
7937     // dimension size as 1, first offset as 0, and first count as 1
7938     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7939     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7940     MapValuesArrayTy CurStrides;
7941     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7942     uint64_t ElementTypeSize;
7943 
7944     // Collect Size information for each dimension and get the element size as
7945     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7946     // should be [10, 10] and the first stride is 4 btyes.
7947     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7948          Components) {
7949       const Expr *AssocExpr = Component.getAssociatedExpression();
7950       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7951 
7952       if (!OASE)
7953         continue;
7954 
7955       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7956       auto *CAT = Context.getAsConstantArrayType(Ty);
7957       auto *VAT = Context.getAsVariableArrayType(Ty);
7958 
7959       // We need all the dimension size except for the last dimension.
7960       assert((VAT || CAT || &Component == &*Components.begin()) &&
7961              "Should be either ConstantArray or VariableArray if not the "
7962              "first Component");
7963 
7964       // Get element size if CurStrides is empty.
7965       if (CurStrides.empty()) {
7966         const Type *ElementType = nullptr;
7967         if (CAT)
7968           ElementType = CAT->getElementType().getTypePtr();
7969         else if (VAT)
7970           ElementType = VAT->getElementType().getTypePtr();
7971         else
7972           assert(&Component == &*Components.begin() &&
7973                  "Only expect pointer (non CAT or VAT) when this is the "
7974                  "first Component");
7975         // If ElementType is null, then it means the base is a pointer
7976         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7977         // for next iteration.
7978         if (ElementType) {
7979           // For the case that having pointer as base, we need to remove one
7980           // level of indirection.
7981           if (&Component != &*Components.begin())
7982             ElementType = ElementType->getPointeeOrArrayElementType();
7983           ElementTypeSize =
7984               Context.getTypeSizeInChars(ElementType).getQuantity();
7985           CurStrides.push_back(
7986               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7987         }
7988       }
7989       // Get dimension value except for the last dimension since we don't need
7990       // it.
7991       if (DimSizes.size() < Components.size() - 1) {
7992         if (CAT)
7993           DimSizes.push_back(llvm::ConstantInt::get(
7994               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7995         else if (VAT)
7996           DimSizes.push_back(CGF.Builder.CreateIntCast(
7997               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7998               /*IsSigned=*/false));
7999       }
8000     }
8001 
8002     // Skip the dummy dimension since we have already have its information.
8003     auto DI = DimSizes.begin() + 1;
8004     // Product of dimension.
8005     llvm::Value *DimProd =
8006         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8007 
8008     // Collect info for non-contiguous. Notice that offset, count, and stride
8009     // are only meaningful for array-section, so we insert a null for anything
8010     // other than array-section.
8011     // Also, the size of offset, count, and stride are not the same as
8012     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8013     // count, and stride are the same as the number of non-contiguous
8014     // declaration in target update to/from clause.
8015     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8016          Components) {
8017       const Expr *AssocExpr = Component.getAssociatedExpression();
8018 
8019       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8020         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8021             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8022             /*isSigned=*/false);
8023         CurOffsets.push_back(Offset);
8024         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8025         CurStrides.push_back(CurStrides.back());
8026         continue;
8027       }
8028 
8029       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8030 
8031       if (!OASE)
8032         continue;
8033 
8034       // Offset
8035       const Expr *OffsetExpr = OASE->getLowerBound();
8036       llvm::Value *Offset = nullptr;
8037       if (!OffsetExpr) {
8038         // If offset is absent, then we just set it to zero.
8039         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8040       } else {
8041         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8042                                            CGF.Int64Ty,
8043                                            /*isSigned=*/false);
8044       }
8045       CurOffsets.push_back(Offset);
8046 
8047       // Count
8048       const Expr *CountExpr = OASE->getLength();
8049       llvm::Value *Count = nullptr;
8050       if (!CountExpr) {
8051         // In Clang, once a high dimension is an array section, we construct all
8052         // the lower dimension as array section, however, for case like
8053         // arr[0:2][2], Clang construct the inner dimension as an array section
8054         // but it actually is not in an array section form according to spec.
8055         if (!OASE->getColonLocFirst().isValid() &&
8056             !OASE->getColonLocSecond().isValid()) {
8057           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8058         } else {
8059           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8060           // When the length is absent it defaults to ⌈(size −
8061           // lower-bound)/stride⌉, where size is the size of the array
8062           // dimension.
8063           const Expr *StrideExpr = OASE->getStride();
8064           llvm::Value *Stride =
8065               StrideExpr
8066                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8067                                               CGF.Int64Ty, /*isSigned=*/false)
8068                   : nullptr;
8069           if (Stride)
8070             Count = CGF.Builder.CreateUDiv(
8071                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8072           else
8073             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8074         }
8075       } else {
8076         Count = CGF.EmitScalarExpr(CountExpr);
8077       }
8078       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8079       CurCounts.push_back(Count);
8080 
8081       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8082       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8083       //              Offset      Count     Stride
8084       //    D0          0           1         4    (int)    <- dummy dimension
8085       //    D1          0           2         8    (2 * (1) * 4)
8086       //    D2          1           2         20   (1 * (1 * 5) * 4)
8087       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8088       const Expr *StrideExpr = OASE->getStride();
8089       llvm::Value *Stride =
8090           StrideExpr
8091               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8092                                           CGF.Int64Ty, /*isSigned=*/false)
8093               : nullptr;
8094       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8095       if (Stride)
8096         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8097       else
8098         CurStrides.push_back(DimProd);
8099       if (DI != DimSizes.end())
8100         ++DI;
8101     }
8102 
8103     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8104     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8105     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8106   }
8107 
8108   /// Return the adjusted map modifiers if the declaration a capture refers to
8109   /// appears in a first-private clause. This is expected to be used only with
8110   /// directives that start with 'target'.
8111   MappableExprsHandler::OpenMPOffloadMappingFlags
8112   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8113     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8114 
8115     // A first private variable captured by reference will use only the
8116     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8117     // declaration is known as first-private in this handler.
8118     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8119       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8120           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8121         return MappableExprsHandler::OMP_MAP_ALWAYS |
8122                MappableExprsHandler::OMP_MAP_TO;
8123       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8124         return MappableExprsHandler::OMP_MAP_TO |
8125                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8126       return MappableExprsHandler::OMP_MAP_PRIVATE |
8127              MappableExprsHandler::OMP_MAP_TO;
8128     }
8129     return MappableExprsHandler::OMP_MAP_TO |
8130            MappableExprsHandler::OMP_MAP_FROM;
8131   }
8132 
8133   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8134     // Rotate by getFlagMemberOffset() bits.
8135     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8136                                                   << getFlagMemberOffset());
8137   }
8138 
8139   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8140                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8141     // If the entry is PTR_AND_OBJ but has not been marked with the special
8142     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8143     // marked as MEMBER_OF.
8144     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8145         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8146       return;
8147 
8148     // Reset the placeholder value to prepare the flag for the assignment of the
8149     // proper MEMBER_OF value.
8150     Flags &= ~OMP_MAP_MEMBER_OF;
8151     Flags |= MemberOfFlag;
8152   }
8153 
8154   void getPlainLayout(const CXXRecordDecl *RD,
8155                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8156                       bool AsBase) const {
8157     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8158 
8159     llvm::StructType *St =
8160         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8161 
8162     unsigned NumElements = St->getNumElements();
8163     llvm::SmallVector<
8164         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8165         RecordLayout(NumElements);
8166 
8167     // Fill bases.
8168     for (const auto &I : RD->bases()) {
8169       if (I.isVirtual())
8170         continue;
8171       const auto *Base = I.getType()->getAsCXXRecordDecl();
8172       // Ignore empty bases.
8173       if (Base->isEmpty() || CGF.getContext()
8174                                  .getASTRecordLayout(Base)
8175                                  .getNonVirtualSize()
8176                                  .isZero())
8177         continue;
8178 
8179       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8180       RecordLayout[FieldIndex] = Base;
8181     }
8182     // Fill in virtual bases.
8183     for (const auto &I : RD->vbases()) {
8184       const auto *Base = I.getType()->getAsCXXRecordDecl();
8185       // Ignore empty bases.
8186       if (Base->isEmpty())
8187         continue;
8188       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8189       if (RecordLayout[FieldIndex])
8190         continue;
8191       RecordLayout[FieldIndex] = Base;
8192     }
8193     // Fill in all the fields.
8194     assert(!RD->isUnion() && "Unexpected union.");
8195     for (const auto *Field : RD->fields()) {
8196       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8197       // will fill in later.)
8198       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8199         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8200         RecordLayout[FieldIndex] = Field;
8201       }
8202     }
8203     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8204              &Data : RecordLayout) {
8205       if (Data.isNull())
8206         continue;
8207       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8208         getPlainLayout(Base, Layout, /*AsBase=*/true);
8209       else
8210         Layout.push_back(Data.get<const FieldDecl *>());
8211     }
8212   }
8213 
8214 public:
8215   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8216       : CurDir(&Dir), CGF(CGF) {
8217     // Extract firstprivate clause information.
8218     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8219       for (const auto *D : C->varlists())
8220         FirstPrivateDecls.try_emplace(
8221             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8222     // Extract implicit firstprivates from uses_allocators clauses.
8223     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8224       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8225         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8226         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8227           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8228                                         /*Implicit=*/true);
8229         else if (const auto *VD = dyn_cast<VarDecl>(
8230                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8231                          ->getDecl()))
8232           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8233       }
8234     }
8235     // Extract device pointer clause information.
8236     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8237       for (auto L : C->component_lists())
8238         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8239   }
8240 
8241   /// Constructor for the declare mapper directive.
8242   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8243       : CurDir(&Dir), CGF(CGF) {}
8244 
8245   /// Generate code for the combined entry if we have a partially mapped struct
8246   /// and take care of the mapping flags of the arguments corresponding to
8247   /// individual struct members.
8248   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8249                          MapFlagsArrayTy &CurTypes,
8250                          const StructRangeInfoTy &PartialStruct,
8251                          const ValueDecl *VD = nullptr,
8252                          bool NotTargetParams = true) const {
8253     if (CurTypes.size() == 1 &&
8254         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8255         !PartialStruct.IsArraySection)
8256       return;
8257     CombinedInfo.Exprs.push_back(VD);
8258     // Base is the base of the struct
8259     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8260     // Pointer is the address of the lowest element
8261     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8262     CombinedInfo.Pointers.push_back(LB);
8263     // There should not be a mapper for a combined entry.
8264     CombinedInfo.Mappers.push_back(nullptr);
8265     // Size is (addr of {highest+1} element) - (addr of lowest element)
8266     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8267     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8268     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8269     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8270     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8271     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8272                                                   /*isSigned=*/false);
8273     CombinedInfo.Sizes.push_back(Size);
8274     // Map type is always TARGET_PARAM, if generate info for captures.
8275     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8276                                                  : OMP_MAP_TARGET_PARAM);
8277     // If any element has the present modifier, then make sure the runtime
8278     // doesn't attempt to allocate the struct.
8279     if (CurTypes.end() !=
8280         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8281           return Type & OMP_MAP_PRESENT;
8282         }))
8283       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8284     // Remove TARGET_PARAM flag from the first element if any.
8285     if (!CurTypes.empty())
8286       CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
8287 
8288     // All other current entries will be MEMBER_OF the combined entry
8289     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8290     // 0xFFFF in the MEMBER_OF field).
8291     OpenMPOffloadMappingFlags MemberOfFlag =
8292         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8293     for (auto &M : CurTypes)
8294       setCorrectMemberOfFlag(M, MemberOfFlag);
8295   }
8296 
8297   /// Generate all the base pointers, section pointers, sizes, map types, and
8298   /// mappers for the extracted mappable expressions (all included in \a
8299   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8300   /// pair of the relevant declaration and index where it occurs is appended to
8301   /// the device pointers info array.
8302   void generateAllInfo(
8303       MapCombinedInfoTy &CombinedInfo,
8304       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8305           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8306     // We have to process the component lists that relate with the same
8307     // declaration in a single chunk so that we can generate the map flags
8308     // correctly. Therefore, we organize all lists in a map.
8309     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8310 
8311     // Helper function to fill the information map for the different supported
8312     // clauses.
8313     auto &&InfoGen =
8314         [&Info, &SkipVarSet](
8315             const ValueDecl *D,
8316             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8317             OpenMPMapClauseKind MapType,
8318             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8319             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8320             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8321             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8322           const ValueDecl *VD =
8323               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8324           if (SkipVarSet.count(VD))
8325             return;
8326           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8327                                 ReturnDevicePointer, IsImplicit, Mapper, VarRef,
8328                                 ForDeviceAddr);
8329         };
8330 
8331     assert(CurDir.is<const OMPExecutableDirective *>() &&
8332            "Expect a executable directive");
8333     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8334     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8335       const auto *EI = C->getVarRefs().begin();
8336       for (const auto L : C->component_lists()) {
8337         // The Expression is not correct if the mapping is implicit
8338         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8339         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8340                 C->getMapTypeModifiers(), llvm::None,
8341                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8342                 E);
8343         ++EI;
8344       }
8345     }
8346     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) {
8347       const auto *EI = C->getVarRefs().begin();
8348       for (const auto L : C->component_lists()) {
8349         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8350                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8351                 C->isImplicit(), std::get<2>(L), *EI);
8352         ++EI;
8353       }
8354     }
8355     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) {
8356       const auto *EI = C->getVarRefs().begin();
8357       for (const auto L : C->component_lists()) {
8358         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8359                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8360                 C->isImplicit(), std::get<2>(L), *EI);
8361         ++EI;
8362       }
8363     }
8364 
8365     // Look at the use_device_ptr clause information and mark the existing map
8366     // entries as such. If there is no map information for an entry in the
8367     // use_device_ptr list, we create one with map type 'alloc' and zero size
8368     // section. It is the user fault if that was not mapped before. If there is
8369     // no map information and the pointer is a struct member, then we defer the
8370     // emission of that entry until the whole struct has been processed.
8371     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8372         DeferredInfo;
8373     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8374 
8375     for (const auto *C :
8376          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8377       for (const auto L : C->component_lists()) {
8378         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8379             std::get<1>(L);
8380         assert(!Components.empty() &&
8381                "Not expecting empty list of components!");
8382         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8383         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8384         const Expr *IE = Components.back().getAssociatedExpression();
8385         // If the first component is a member expression, we have to look into
8386         // 'this', which maps to null in the map of map information. Otherwise
8387         // look directly for the information.
8388         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8389 
8390         // We potentially have map information for this declaration already.
8391         // Look for the first set of components that refer to it.
8392         if (It != Info.end()) {
8393           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8394             return MI.Components.back().getAssociatedDeclaration() == VD;
8395           });
8396           // If we found a map entry, signal that the pointer has to be returned
8397           // and move on to the next declaration.
8398           // Exclude cases where the base pointer is mapped as array subscript,
8399           // array section or array shaping. The base address is passed as a
8400           // pointer to base in this case and cannot be used as a base for
8401           // use_device_ptr list item.
8402           if (CI != It->second.end()) {
8403             auto PrevCI = std::next(CI->Components.rbegin());
8404             const auto *VarD = dyn_cast<VarDecl>(VD);
8405             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8406                 isa<MemberExpr>(IE) ||
8407                 !VD->getType().getNonReferenceType()->isPointerType() ||
8408                 PrevCI == CI->Components.rend() ||
8409                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8410                 VarD->hasLocalStorage()) {
8411               CI->ReturnDevicePointer = true;
8412               continue;
8413             }
8414           }
8415         }
8416 
8417         // We didn't find any match in our map information - generate a zero
8418         // size array section - if the pointer is a struct member we defer this
8419         // action until the whole struct has been processed.
8420         if (isa<MemberExpr>(IE)) {
8421           // Insert the pointer into Info to be processed by
8422           // generateInfoForComponentList. Because it is a member pointer
8423           // without a pointee, no entry will be generated for it, therefore
8424           // we need to generate one after the whole struct has been processed.
8425           // Nonetheless, generateInfoForComponentList must be called to take
8426           // the pointer into account for the calculation of the range of the
8427           // partial struct.
8428           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8429                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8430           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8431         } else {
8432           llvm::Value *Ptr =
8433               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8434           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8435           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8436           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8437           UseDevicePtrCombinedInfo.Sizes.push_back(
8438               llvm::Constant::getNullValue(CGF.Int64Ty));
8439           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8440           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8441         }
8442       }
8443     }
8444 
8445     // Look at the use_device_addr clause information and mark the existing map
8446     // entries as such. If there is no map information for an entry in the
8447     // use_device_addr list, we create one with map type 'alloc' and zero size
8448     // section. It is the user fault if that was not mapped before. If there is
8449     // no map information and the pointer is a struct member, then we defer the
8450     // emission of that entry until the whole struct has been processed.
8451     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8452     for (const auto *C :
8453          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8454       for (const auto L : C->component_lists()) {
8455         assert(!std::get<1>(L).empty() &&
8456                "Not expecting empty list of components!");
8457         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8458         if (!Processed.insert(VD).second)
8459           continue;
8460         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8461         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8462         // If the first component is a member expression, we have to look into
8463         // 'this', which maps to null in the map of map information. Otherwise
8464         // look directly for the information.
8465         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8466 
8467         // We potentially have map information for this declaration already.
8468         // Look for the first set of components that refer to it.
8469         if (It != Info.end()) {
8470           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8471             return MI.Components.back().getAssociatedDeclaration() == VD;
8472           });
8473           // If we found a map entry, signal that the pointer has to be returned
8474           // and move on to the next declaration.
8475           if (CI != It->second.end()) {
8476             CI->ReturnDevicePointer = true;
8477             continue;
8478           }
8479         }
8480 
8481         // We didn't find any match in our map information - generate a zero
8482         // size array section - if the pointer is a struct member we defer this
8483         // action until the whole struct has been processed.
8484         if (isa<MemberExpr>(IE)) {
8485           // Insert the pointer into Info to be processed by
8486           // generateInfoForComponentList. Because it is a member pointer
8487           // without a pointee, no entry will be generated for it, therefore
8488           // we need to generate one after the whole struct has been processed.
8489           // Nonetheless, generateInfoForComponentList must be called to take
8490           // the pointer into account for the calculation of the range of the
8491           // partial struct.
8492           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8493                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8494                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8495           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8496         } else {
8497           llvm::Value *Ptr;
8498           if (IE->isGLValue())
8499             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8500           else
8501             Ptr = CGF.EmitScalarExpr(IE);
8502           CombinedInfo.Exprs.push_back(VD);
8503           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8504           CombinedInfo.Pointers.push_back(Ptr);
8505           CombinedInfo.Sizes.push_back(
8506               llvm::Constant::getNullValue(CGF.Int64Ty));
8507           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8508           CombinedInfo.Mappers.push_back(nullptr);
8509         }
8510       }
8511     }
8512 
8513     for (const auto &M : Info) {
8514       // Underlying variable declaration used in the map clause.
8515       const ValueDecl *VD = std::get<0>(M);
8516 
8517       // Temporary generated information.
8518       MapCombinedInfoTy CurInfo;
8519       StructRangeInfoTy PartialStruct;
8520 
8521       for (const MapInfo &L : M.second) {
8522         assert(!L.Components.empty() &&
8523                "Not expecting declaration with no component lists.");
8524 
8525         // Remember the current base pointer index.
8526         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8527         CurInfo.NonContigInfo.IsNonContiguous =
8528             L.Components.back().isNonContiguous();
8529         generateInfoForComponentList(
8530             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8531             PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit,
8532             L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8533 
8534         // If this entry relates with a device pointer, set the relevant
8535         // declaration and add the 'return pointer' flag.
8536         if (L.ReturnDevicePointer) {
8537           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8538                  "Unexpected number of mapped base pointers.");
8539 
8540           const ValueDecl *RelevantVD =
8541               L.Components.back().getAssociatedDeclaration();
8542           assert(RelevantVD &&
8543                  "No relevant declaration related with device pointer??");
8544 
8545           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8546               RelevantVD);
8547           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8548         }
8549       }
8550 
8551       // Append any pending zero-length pointers which are struct members and
8552       // used with use_device_ptr or use_device_addr.
8553       auto CI = DeferredInfo.find(M.first);
8554       if (CI != DeferredInfo.end()) {
8555         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8556           llvm::Value *BasePtr;
8557           llvm::Value *Ptr;
8558           if (L.ForDeviceAddr) {
8559             if (L.IE->isGLValue())
8560               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8561             else
8562               Ptr = this->CGF.EmitScalarExpr(L.IE);
8563             BasePtr = Ptr;
8564             // Entry is RETURN_PARAM. Also, set the placeholder value
8565             // MEMBER_OF=FFFF so that the entry is later updated with the
8566             // correct value of MEMBER_OF.
8567             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8568           } else {
8569             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8570             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8571                                              L.IE->getExprLoc());
8572             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8573             // value MEMBER_OF=FFFF so that the entry is later updated with the
8574             // correct value of MEMBER_OF.
8575             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8576                                     OMP_MAP_MEMBER_OF);
8577           }
8578           CurInfo.Exprs.push_back(L.VD);
8579           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8580           CurInfo.Pointers.push_back(Ptr);
8581           CurInfo.Sizes.push_back(
8582               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8583           CurInfo.Mappers.push_back(nullptr);
8584         }
8585       }
8586 
8587       // If there is an entry in PartialStruct it means we have a struct with
8588       // individual members mapped. Emit an extra combined entry.
8589       if (PartialStruct.Base.isValid())
8590         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8591 
8592       // We need to append the results of this capture to what we already have.
8593       CombinedInfo.append(CurInfo);
8594     }
8595     // Append data for use_device_ptr clauses.
8596     CombinedInfo.append(UseDevicePtrCombinedInfo);
8597   }
8598 
8599   /// Generate all the base pointers, section pointers, sizes, map types, and
8600   /// mappers for the extracted map clauses of user-defined mapper (all included
8601   /// in \a CombinedInfo).
8602   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8603     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8604            "Expect a declare mapper directive");
8605     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8606     // We have to process the component lists that relate with the same
8607     // declaration in a single chunk so that we can generate the map flags
8608     // correctly. Therefore, we organize all lists in a map.
8609     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8610 
8611     // Fill the information map for map clauses.
8612     for (const auto *C : CurMapperDir->clauselists()) {
8613       const auto *MC = cast<OMPMapClause>(C);
8614       const auto *EI = MC->getVarRefs().begin();
8615       for (const auto L : MC->component_lists()) {
8616         // The Expression is not correct if the mapping is implicit
8617         const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr;
8618         const ValueDecl *VD =
8619             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8620                            : nullptr;
8621         // Get the corresponding user-defined mapper.
8622         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8623                               MC->getMapTypeModifiers(), llvm::None,
8624                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8625                               std::get<2>(L), E);
8626         ++EI;
8627       }
8628     }
8629 
8630     for (const auto &M : Info) {
8631       // We need to know when we generate information for the first component
8632       // associated with a capture, because the mapping flags depend on it.
8633       bool IsFirstComponentList = true;
8634 
8635       // Underlying variable declaration used in the map clause.
8636       const ValueDecl *VD = std::get<0>(M);
8637 
8638       // Temporary generated information.
8639       MapCombinedInfoTy CurInfo;
8640       StructRangeInfoTy PartialStruct;
8641 
8642       for (const MapInfo &L : M.second) {
8643         assert(!L.Components.empty() &&
8644                "Not expecting declaration with no component lists.");
8645         generateInfoForComponentList(
8646             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8647             PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
8648             L.ForDeviceAddr, VD, L.VarRef);
8649         IsFirstComponentList = false;
8650       }
8651 
8652       // If there is an entry in PartialStruct it means we have a struct with
8653       // individual members mapped. Emit an extra combined entry.
8654       if (PartialStruct.Base.isValid()) {
8655         CurInfo.NonContigInfo.Dims.push_back(0);
8656         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8657       }
8658 
8659       // We need to append the results of this capture to what we already have.
8660       CombinedInfo.append(CurInfo);
8661     }
8662   }
8663 
8664   /// Emit capture info for lambdas for variables captured by reference.
8665   void generateInfoForLambdaCaptures(
8666       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8667       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8668     const auto *RD = VD->getType()
8669                          .getCanonicalType()
8670                          .getNonReferenceType()
8671                          ->getAsCXXRecordDecl();
8672     if (!RD || !RD->isLambda())
8673       return;
8674     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8675     LValue VDLVal = CGF.MakeAddrLValue(
8676         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8677     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8678     FieldDecl *ThisCapture = nullptr;
8679     RD->getCaptureFields(Captures, ThisCapture);
8680     if (ThisCapture) {
8681       LValue ThisLVal =
8682           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8683       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8684       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8685                                  VDLVal.getPointer(CGF));
8686       CombinedInfo.Exprs.push_back(VD);
8687       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8688       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8689       CombinedInfo.Sizes.push_back(
8690           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8691                                     CGF.Int64Ty, /*isSigned=*/true));
8692       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8693                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8694       CombinedInfo.Mappers.push_back(nullptr);
8695     }
8696     for (const LambdaCapture &LC : RD->captures()) {
8697       if (!LC.capturesVariable())
8698         continue;
8699       const VarDecl *VD = LC.getCapturedVar();
8700       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8701         continue;
8702       auto It = Captures.find(VD);
8703       assert(It != Captures.end() && "Found lambda capture without field.");
8704       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8705       if (LC.getCaptureKind() == LCK_ByRef) {
8706         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8707         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8708                                    VDLVal.getPointer(CGF));
8709         CombinedInfo.Exprs.push_back(VD);
8710         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8711         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8712         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8713             CGF.getTypeSize(
8714                 VD->getType().getCanonicalType().getNonReferenceType()),
8715             CGF.Int64Ty, /*isSigned=*/true));
8716       } else {
8717         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8718         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8719                                    VDLVal.getPointer(CGF));
8720         CombinedInfo.Exprs.push_back(VD);
8721         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8722         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8723         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8724       }
8725       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8726                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8727       CombinedInfo.Mappers.push_back(nullptr);
8728     }
8729   }
8730 
8731   /// Set correct indices for lambdas captures.
8732   void adjustMemberOfForLambdaCaptures(
8733       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8734       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8735       MapFlagsArrayTy &Types) const {
8736     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8737       // Set correct member_of idx for all implicit lambda captures.
8738       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8739                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8740         continue;
8741       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8742       assert(BasePtr && "Unable to find base lambda address.");
8743       int TgtIdx = -1;
8744       for (unsigned J = I; J > 0; --J) {
8745         unsigned Idx = J - 1;
8746         if (Pointers[Idx] != BasePtr)
8747           continue;
8748         TgtIdx = Idx;
8749         break;
8750       }
8751       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8752       // All other current entries will be MEMBER_OF the combined entry
8753       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8754       // 0xFFFF in the MEMBER_OF field).
8755       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8756       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8757     }
8758   }
8759 
8760   /// Generate the base pointers, section pointers, sizes, map types, and
8761   /// mappers associated to a given capture (all included in \a CombinedInfo).
8762   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8763                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8764                               StructRangeInfoTy &PartialStruct) const {
8765     assert(!Cap->capturesVariableArrayType() &&
8766            "Not expecting to generate map info for a variable array type!");
8767 
8768     // We need to know when we generating information for the first component
8769     const ValueDecl *VD = Cap->capturesThis()
8770                               ? nullptr
8771                               : Cap->getCapturedVar()->getCanonicalDecl();
8772 
8773     // If this declaration appears in a is_device_ptr clause we just have to
8774     // pass the pointer by value. If it is a reference to a declaration, we just
8775     // pass its value.
8776     if (DevPointersMap.count(VD)) {
8777       CombinedInfo.Exprs.push_back(VD);
8778       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8779       CombinedInfo.Pointers.push_back(Arg);
8780       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8781           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8782           /*isSigned=*/true));
8783       CombinedInfo.Types.push_back(
8784           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8785           OMP_MAP_TARGET_PARAM);
8786       CombinedInfo.Mappers.push_back(nullptr);
8787       return;
8788     }
8789 
8790     using MapData =
8791         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8792                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8793                    const ValueDecl *, const Expr *>;
8794     SmallVector<MapData, 4> DeclComponentLists;
8795     assert(CurDir.is<const OMPExecutableDirective *>() &&
8796            "Expect a executable directive");
8797     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8798     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8799       const auto *EI = C->getVarRefs().begin();
8800       for (const auto L : C->decl_component_lists(VD)) {
8801         const ValueDecl *VDecl, *Mapper;
8802         // The Expression is not correct if the mapping is implicit
8803         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8804         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8805         std::tie(VDecl, Components, Mapper) = L;
8806         assert(VDecl == VD && "We got information for the wrong declaration??");
8807         assert(!Components.empty() &&
8808                "Not expecting declaration with no component lists.");
8809         DeclComponentLists.emplace_back(Components, C->getMapType(),
8810                                         C->getMapTypeModifiers(),
8811                                         C->isImplicit(), Mapper, E);
8812         ++EI;
8813       }
8814     }
8815 
8816     // Find overlapping elements (including the offset from the base element).
8817     llvm::SmallDenseMap<
8818         const MapData *,
8819         llvm::SmallVector<
8820             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8821         4>
8822         OverlappedData;
8823     size_t Count = 0;
8824     for (const MapData &L : DeclComponentLists) {
8825       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8826       OpenMPMapClauseKind MapType;
8827       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8828       bool IsImplicit;
8829       const ValueDecl *Mapper;
8830       const Expr *VarRef;
8831       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8832           L;
8833       ++Count;
8834       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8835         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8836         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8837                  VarRef) = L1;
8838         auto CI = Components.rbegin();
8839         auto CE = Components.rend();
8840         auto SI = Components1.rbegin();
8841         auto SE = Components1.rend();
8842         for (; CI != CE && SI != SE; ++CI, ++SI) {
8843           if (CI->getAssociatedExpression()->getStmtClass() !=
8844               SI->getAssociatedExpression()->getStmtClass())
8845             break;
8846           // Are we dealing with different variables/fields?
8847           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8848             break;
8849         }
8850         // Found overlapping if, at least for one component, reached the head of
8851         // the components list.
8852         if (CI == CE || SI == SE) {
8853           assert((CI != CE || SI != SE) &&
8854                  "Unexpected full match of the mapping components.");
8855           const MapData &BaseData = CI == CE ? L : L1;
8856           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8857               SI == SE ? Components : Components1;
8858           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8859           OverlappedElements.getSecond().push_back(SubData);
8860         }
8861       }
8862     }
8863     // Sort the overlapped elements for each item.
8864     llvm::SmallVector<const FieldDecl *, 4> Layout;
8865     if (!OverlappedData.empty()) {
8866       if (const auto *CRD =
8867               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8868         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8869       else {
8870         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8871         Layout.append(RD->field_begin(), RD->field_end());
8872       }
8873     }
8874     for (auto &Pair : OverlappedData) {
8875       llvm::sort(
8876           Pair.getSecond(),
8877           [&Layout](
8878               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8879               OMPClauseMappableExprCommon::MappableExprComponentListRef
8880                   Second) {
8881             auto CI = First.rbegin();
8882             auto CE = First.rend();
8883             auto SI = Second.rbegin();
8884             auto SE = Second.rend();
8885             for (; CI != CE && SI != SE; ++CI, ++SI) {
8886               if (CI->getAssociatedExpression()->getStmtClass() !=
8887                   SI->getAssociatedExpression()->getStmtClass())
8888                 break;
8889               // Are we dealing with different variables/fields?
8890               if (CI->getAssociatedDeclaration() !=
8891                   SI->getAssociatedDeclaration())
8892                 break;
8893             }
8894 
8895             // Lists contain the same elements.
8896             if (CI == CE && SI == SE)
8897               return false;
8898 
8899             // List with less elements is less than list with more elements.
8900             if (CI == CE || SI == SE)
8901               return CI == CE;
8902 
8903             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8904             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8905             if (FD1->getParent() == FD2->getParent())
8906               return FD1->getFieldIndex() < FD2->getFieldIndex();
8907             const auto It =
8908                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8909                   return FD == FD1 || FD == FD2;
8910                 });
8911             return *It == FD1;
8912           });
8913     }
8914 
8915     // Associated with a capture, because the mapping flags depend on it.
8916     // Go through all of the elements with the overlapped elements.
8917     for (const auto &Pair : OverlappedData) {
8918       const MapData &L = *Pair.getFirst();
8919       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8920       OpenMPMapClauseKind MapType;
8921       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8922       bool IsImplicit;
8923       const ValueDecl *Mapper;
8924       const Expr *VarRef;
8925       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8926           L;
8927       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8928           OverlappedComponents = Pair.getSecond();
8929       bool IsFirstComponentList = true;
8930       generateInfoForComponentList(
8931           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8932           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8933           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8934     }
8935     // Go through other elements without overlapped elements.
8936     bool IsFirstComponentList = OverlappedData.empty();
8937     for (const MapData &L : DeclComponentLists) {
8938       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8939       OpenMPMapClauseKind MapType;
8940       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8941       bool IsImplicit;
8942       const ValueDecl *Mapper;
8943       const Expr *VarRef;
8944       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8945           L;
8946       auto It = OverlappedData.find(&L);
8947       if (It == OverlappedData.end())
8948         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8949                                      Components, CombinedInfo, PartialStruct,
8950                                      IsFirstComponentList, IsImplicit, Mapper,
8951                                      /*ForDeviceAddr=*/false, VD, VarRef);
8952       IsFirstComponentList = false;
8953     }
8954   }
8955 
8956   /// Generate the default map information for a given capture \a CI,
8957   /// record field declaration \a RI and captured value \a CV.
8958   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8959                               const FieldDecl &RI, llvm::Value *CV,
8960                               MapCombinedInfoTy &CombinedInfo) const {
8961     bool IsImplicit = true;
8962     // Do the default mapping.
8963     if (CI.capturesThis()) {
8964       CombinedInfo.Exprs.push_back(nullptr);
8965       CombinedInfo.BasePointers.push_back(CV);
8966       CombinedInfo.Pointers.push_back(CV);
8967       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8968       CombinedInfo.Sizes.push_back(
8969           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8970                                     CGF.Int64Ty, /*isSigned=*/true));
8971       // Default map type.
8972       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8973     } else if (CI.capturesVariableByCopy()) {
8974       const VarDecl *VD = CI.getCapturedVar();
8975       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8976       CombinedInfo.BasePointers.push_back(CV);
8977       CombinedInfo.Pointers.push_back(CV);
8978       if (!RI.getType()->isAnyPointerType()) {
8979         // We have to signal to the runtime captures passed by value that are
8980         // not pointers.
8981         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8982         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8983             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8984       } else {
8985         // Pointers are implicitly mapped with a zero size and no flags
8986         // (other than first map that is added for all implicit maps).
8987         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8988         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8989       }
8990       auto I = FirstPrivateDecls.find(VD);
8991       if (I != FirstPrivateDecls.end())
8992         IsImplicit = I->getSecond();
8993     } else {
8994       assert(CI.capturesVariable() && "Expected captured reference.");
8995       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8996       QualType ElementType = PtrTy->getPointeeType();
8997       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8998           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8999       // The default map type for a scalar/complex type is 'to' because by
9000       // default the value doesn't have to be retrieved. For an aggregate
9001       // type, the default is 'tofrom'.
9002       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9003       const VarDecl *VD = CI.getCapturedVar();
9004       auto I = FirstPrivateDecls.find(VD);
9005       if (I != FirstPrivateDecls.end() &&
9006           VD->getType().isConstant(CGF.getContext())) {
9007         llvm::Constant *Addr =
9008             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9009         // Copy the value of the original variable to the new global copy.
9010         CGF.Builder.CreateMemCpy(
9011             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9012             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9013             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9014         // Use new global variable as the base pointers.
9015         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9016         CombinedInfo.BasePointers.push_back(Addr);
9017         CombinedInfo.Pointers.push_back(Addr);
9018       } else {
9019         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9020         CombinedInfo.BasePointers.push_back(CV);
9021         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9022           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9023               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9024               AlignmentSource::Decl));
9025           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9026         } else {
9027           CombinedInfo.Pointers.push_back(CV);
9028         }
9029       }
9030       if (I != FirstPrivateDecls.end())
9031         IsImplicit = I->getSecond();
9032     }
9033     // Every default map produces a single argument which is a target parameter.
9034     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9035 
9036     // Add flag stating this is an implicit map.
9037     if (IsImplicit)
9038       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9039 
9040     // No user-defined mapper for default mapping.
9041     CombinedInfo.Mappers.push_back(nullptr);
9042   }
9043 };
9044 } // anonymous namespace
9045 
9046 static void emitNonContiguousDescriptor(
9047     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9048     CGOpenMPRuntime::TargetDataInfo &Info) {
9049   CodeGenModule &CGM = CGF.CGM;
9050   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9051       &NonContigInfo = CombinedInfo.NonContigInfo;
9052 
9053   // Build an array of struct descriptor_dim and then assign it to
9054   // offload_args.
9055   //
9056   // struct descriptor_dim {
9057   //  uint64_t offset;
9058   //  uint64_t count;
9059   //  uint64_t stride
9060   // };
9061   ASTContext &C = CGF.getContext();
9062   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9063   RecordDecl *RD;
9064   RD = C.buildImplicitRecord("descriptor_dim");
9065   RD->startDefinition();
9066   addFieldToRecordDecl(C, RD, Int64Ty);
9067   addFieldToRecordDecl(C, RD, Int64Ty);
9068   addFieldToRecordDecl(C, RD, Int64Ty);
9069   RD->completeDefinition();
9070   QualType DimTy = C.getRecordType(RD);
9071 
9072   enum { OffsetFD = 0, CountFD, StrideFD };
9073   // We need two index variable here since the size of "Dims" is the same as the
9074   // size of Components, however, the size of offset, count, and stride is equal
9075   // to the size of base declaration that is non-contiguous.
9076   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9077     // Skip emitting ir if dimension size is 1 since it cannot be
9078     // non-contiguous.
9079     if (NonContigInfo.Dims[I] == 1)
9080       continue;
9081     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9082     QualType ArrayTy =
9083         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9084     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9085     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9086       unsigned RevIdx = EE - II - 1;
9087       LValue DimsLVal = CGF.MakeAddrLValue(
9088           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9089       // Offset
9090       LValue OffsetLVal = CGF.EmitLValueForField(
9091           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9092       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9093       // Count
9094       LValue CountLVal = CGF.EmitLValueForField(
9095           DimsLVal, *std::next(RD->field_begin(), CountFD));
9096       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9097       // Stride
9098       LValue StrideLVal = CGF.EmitLValueForField(
9099           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9100       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9101     }
9102     // args[I] = &dims
9103     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9104         DimsAddr, CGM.Int8PtrTy);
9105     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9106         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9107         Info.PointersArray, 0, I);
9108     Address PAddr(P, CGF.getPointerAlign());
9109     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9110     ++L;
9111   }
9112 }
9113 
9114 /// Emit a string constant containing the names of the values mapped to the
9115 /// offloading runtime library.
9116 llvm::Constant *
9117 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9118                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9119   llvm::Constant *SrcLocStr;
9120   if (!MapExprs.getMapDecl()) {
9121     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9122   } else {
9123     std::string ExprName = "";
9124     if (MapExprs.getMapExpr()) {
9125       PrintingPolicy P(CGF.getContext().getLangOpts());
9126       llvm::raw_string_ostream OS(ExprName);
9127       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9128       OS.flush();
9129     } else {
9130       ExprName = MapExprs.getMapDecl()->getNameAsString();
9131     }
9132 
9133     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9134     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9135     const char *FileName = PLoc.getFilename();
9136     unsigned Line = PLoc.getLine();
9137     unsigned Column = PLoc.getColumn();
9138     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9139                                                 Line, Column);
9140   }
9141 
9142   return SrcLocStr;
9143 }
9144 
9145 /// Emit the arrays used to pass the captures and map information to the
9146 /// offloading runtime library. If there is no map or capture information,
9147 /// return nullptr by reference.
9148 static void emitOffloadingArrays(
9149     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9150     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9151     bool IsNonContiguous = false) {
9152   CodeGenModule &CGM = CGF.CGM;
9153   ASTContext &Ctx = CGF.getContext();
9154 
9155   // Reset the array information.
9156   Info.clearArrayInfo();
9157   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9158 
9159   if (Info.NumberOfPtrs) {
9160     // Detect if we have any capture size requiring runtime evaluation of the
9161     // size so that a constant array could be eventually used.
9162     bool hasRuntimeEvaluationCaptureSize = false;
9163     for (llvm::Value *S : CombinedInfo.Sizes)
9164       if (!isa<llvm::Constant>(S)) {
9165         hasRuntimeEvaluationCaptureSize = true;
9166         break;
9167       }
9168 
9169     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9170     QualType PointerArrayType = Ctx.getConstantArrayType(
9171         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9172         /*IndexTypeQuals=*/0);
9173 
9174     Info.BasePointersArray =
9175         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9176     Info.PointersArray =
9177         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9178     Address MappersArray =
9179         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9180     Info.MappersArray = MappersArray.getPointer();
9181 
9182     // If we don't have any VLA types or other types that require runtime
9183     // evaluation, we can use a constant array for the map sizes, otherwise we
9184     // need to fill up the arrays as we do for the pointers.
9185     QualType Int64Ty =
9186         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9187     if (hasRuntimeEvaluationCaptureSize) {
9188       QualType SizeArrayType = Ctx.getConstantArrayType(
9189           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9190           /*IndexTypeQuals=*/0);
9191       Info.SizesArray =
9192           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9193     } else {
9194       // We expect all the sizes to be constant, so we collect them to create
9195       // a constant array.
9196       SmallVector<llvm::Constant *, 16> ConstSizes;
9197       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9198         if (IsNonContiguous &&
9199             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9200           ConstSizes.push_back(llvm::ConstantInt::get(
9201               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9202         } else {
9203           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9204         }
9205       }
9206 
9207       auto *SizesArrayInit = llvm::ConstantArray::get(
9208           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9209       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9210       auto *SizesArrayGbl = new llvm::GlobalVariable(
9211           CGM.getModule(), SizesArrayInit->getType(),
9212           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9213           SizesArrayInit, Name);
9214       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9215       Info.SizesArray = SizesArrayGbl;
9216     }
9217 
9218     // The map types are always constant so we don't need to generate code to
9219     // fill arrays. Instead, we create an array constant.
9220     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9221     llvm::copy(CombinedInfo.Types, Mapping.begin());
9222     llvm::Constant *MapTypesArrayInit =
9223         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9224     std::string MaptypesName =
9225         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9226     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9227         CGM.getModule(), MapTypesArrayInit->getType(),
9228         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9229         MapTypesArrayInit, MaptypesName);
9230     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9231     Info.MapTypesArray = MapTypesArrayGbl;
9232 
9233     // The information types are only built if there is debug information
9234     // requested.
9235     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9236       Info.MapNamesArray = llvm::Constant::getNullValue(
9237           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9238     } else {
9239       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9240         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9241       };
9242       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9243       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9244 
9245       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9246           llvm::ArrayType::get(
9247               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9248               CombinedInfo.Exprs.size()),
9249           InfoMap);
9250       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9251           CGM.getModule(), MapNamesArrayInit->getType(),
9252           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9253           MapNamesArrayInit,
9254           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9255       Info.MapNamesArray = MapNamesArrayGbl;
9256     }
9257 
9258     // If there's a present map type modifier, it must not be applied to the end
9259     // of a region, so generate a separate map type array in that case.
9260     if (Info.separateBeginEndCalls()) {
9261       bool EndMapTypesDiffer = false;
9262       for (uint64_t &Type : Mapping) {
9263         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9264           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9265           EndMapTypesDiffer = true;
9266         }
9267       }
9268       if (EndMapTypesDiffer) {
9269         MapTypesArrayInit =
9270             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9271         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9272         MapTypesArrayGbl = new llvm::GlobalVariable(
9273             CGM.getModule(), MapTypesArrayInit->getType(),
9274             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9275             MapTypesArrayInit, MaptypesName);
9276         MapTypesArrayGbl->setUnnamedAddr(
9277             llvm::GlobalValue::UnnamedAddr::Global);
9278         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9279       }
9280     }
9281 
9282     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9283       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9284       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9285           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9286           Info.BasePointersArray, 0, I);
9287       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9288           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9289       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9290       CGF.Builder.CreateStore(BPVal, BPAddr);
9291 
9292       if (Info.requiresDevicePointerInfo())
9293         if (const ValueDecl *DevVD =
9294                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9295           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9296 
9297       llvm::Value *PVal = CombinedInfo.Pointers[I];
9298       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9299           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9300           Info.PointersArray, 0, I);
9301       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9302           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9303       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9304       CGF.Builder.CreateStore(PVal, PAddr);
9305 
9306       if (hasRuntimeEvaluationCaptureSize) {
9307         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9308             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9309             Info.SizesArray,
9310             /*Idx0=*/0,
9311             /*Idx1=*/I);
9312         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9313         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9314                                                           CGM.Int64Ty,
9315                                                           /*isSigned=*/true),
9316                                 SAddr);
9317       }
9318 
9319       // Fill up the mapper array.
9320       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9321       if (CombinedInfo.Mappers[I]) {
9322         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9323             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9324         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9325         Info.HasMapper = true;
9326       }
9327       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9328       CGF.Builder.CreateStore(MFunc, MAddr);
9329     }
9330   }
9331 
9332   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9333       Info.NumberOfPtrs == 0)
9334     return;
9335 
9336   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9337 }
9338 
9339 namespace {
9340 /// Additional arguments for emitOffloadingArraysArgument function.
9341 struct ArgumentsOptions {
9342   bool ForEndCall = false;
9343   ArgumentsOptions() = default;
9344   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9345 };
9346 } // namespace
9347 
9348 /// Emit the arguments to be passed to the runtime library based on the
9349 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9350 /// ForEndCall, emit map types to be passed for the end of the region instead of
9351 /// the beginning.
9352 static void emitOffloadingArraysArgument(
9353     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9354     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9355     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9356     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9357     const ArgumentsOptions &Options = ArgumentsOptions()) {
9358   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9359          "expected region end call to runtime only when end call is separate");
9360   CodeGenModule &CGM = CGF.CGM;
9361   if (Info.NumberOfPtrs) {
9362     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9363         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9364         Info.BasePointersArray,
9365         /*Idx0=*/0, /*Idx1=*/0);
9366     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9367         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9368         Info.PointersArray,
9369         /*Idx0=*/0,
9370         /*Idx1=*/0);
9371     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9372         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9373         /*Idx0=*/0, /*Idx1=*/0);
9374     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9375         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9376         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9377                                                     : Info.MapTypesArray,
9378         /*Idx0=*/0,
9379         /*Idx1=*/0);
9380 
9381     // Only emit the mapper information arrays if debug information is
9382     // requested.
9383     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9384       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9385     else
9386       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9387           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9388           Info.MapNamesArray,
9389           /*Idx0=*/0,
9390           /*Idx1=*/0);
9391     // If there is no user-defined mapper, set the mapper array to nullptr to
9392     // avoid an unnecessary data privatization
9393     if (!Info.HasMapper)
9394       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9395     else
9396       MappersArrayArg =
9397           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9398   } else {
9399     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9400     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9401     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9402     MapTypesArrayArg =
9403         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9404     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9405     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9406   }
9407 }
9408 
9409 /// Check for inner distribute directive.
9410 static const OMPExecutableDirective *
9411 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9412   const auto *CS = D.getInnermostCapturedStmt();
9413   const auto *Body =
9414       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9415   const Stmt *ChildStmt =
9416       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9417 
9418   if (const auto *NestedDir =
9419           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9420     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9421     switch (D.getDirectiveKind()) {
9422     case OMPD_target:
9423       if (isOpenMPDistributeDirective(DKind))
9424         return NestedDir;
9425       if (DKind == OMPD_teams) {
9426         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9427             /*IgnoreCaptured=*/true);
9428         if (!Body)
9429           return nullptr;
9430         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9431         if (const auto *NND =
9432                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9433           DKind = NND->getDirectiveKind();
9434           if (isOpenMPDistributeDirective(DKind))
9435             return NND;
9436         }
9437       }
9438       return nullptr;
9439     case OMPD_target_teams:
9440       if (isOpenMPDistributeDirective(DKind))
9441         return NestedDir;
9442       return nullptr;
9443     case OMPD_target_parallel:
9444     case OMPD_target_simd:
9445     case OMPD_target_parallel_for:
9446     case OMPD_target_parallel_for_simd:
9447       return nullptr;
9448     case OMPD_target_teams_distribute:
9449     case OMPD_target_teams_distribute_simd:
9450     case OMPD_target_teams_distribute_parallel_for:
9451     case OMPD_target_teams_distribute_parallel_for_simd:
9452     case OMPD_parallel:
9453     case OMPD_for:
9454     case OMPD_parallel_for:
9455     case OMPD_parallel_master:
9456     case OMPD_parallel_sections:
9457     case OMPD_for_simd:
9458     case OMPD_parallel_for_simd:
9459     case OMPD_cancel:
9460     case OMPD_cancellation_point:
9461     case OMPD_ordered:
9462     case OMPD_threadprivate:
9463     case OMPD_allocate:
9464     case OMPD_task:
9465     case OMPD_simd:
9466     case OMPD_sections:
9467     case OMPD_section:
9468     case OMPD_single:
9469     case OMPD_master:
9470     case OMPD_critical:
9471     case OMPD_taskyield:
9472     case OMPD_barrier:
9473     case OMPD_taskwait:
9474     case OMPD_taskgroup:
9475     case OMPD_atomic:
9476     case OMPD_flush:
9477     case OMPD_depobj:
9478     case OMPD_scan:
9479     case OMPD_teams:
9480     case OMPD_target_data:
9481     case OMPD_target_exit_data:
9482     case OMPD_target_enter_data:
9483     case OMPD_distribute:
9484     case OMPD_distribute_simd:
9485     case OMPD_distribute_parallel_for:
9486     case OMPD_distribute_parallel_for_simd:
9487     case OMPD_teams_distribute:
9488     case OMPD_teams_distribute_simd:
9489     case OMPD_teams_distribute_parallel_for:
9490     case OMPD_teams_distribute_parallel_for_simd:
9491     case OMPD_target_update:
9492     case OMPD_declare_simd:
9493     case OMPD_declare_variant:
9494     case OMPD_begin_declare_variant:
9495     case OMPD_end_declare_variant:
9496     case OMPD_declare_target:
9497     case OMPD_end_declare_target:
9498     case OMPD_declare_reduction:
9499     case OMPD_declare_mapper:
9500     case OMPD_taskloop:
9501     case OMPD_taskloop_simd:
9502     case OMPD_master_taskloop:
9503     case OMPD_master_taskloop_simd:
9504     case OMPD_parallel_master_taskloop:
9505     case OMPD_parallel_master_taskloop_simd:
9506     case OMPD_requires:
9507     case OMPD_unknown:
9508     default:
9509       llvm_unreachable("Unexpected directive.");
9510     }
9511   }
9512 
9513   return nullptr;
9514 }
9515 
9516 /// Emit the user-defined mapper function. The code generation follows the
9517 /// pattern in the example below.
9518 /// \code
9519 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9520 ///                                           void *base, void *begin,
9521 ///                                           int64_t size, int64_t type,
9522 ///                                           void *name = nullptr) {
9523 ///   // Allocate space for an array section first.
9524 ///   if (size > 1 && !maptype.IsDelete)
9525 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9526 ///                                 size*sizeof(Ty), clearToFrom(type));
9527 ///   // Map members.
9528 ///   for (unsigned i = 0; i < size; i++) {
9529 ///     // For each component specified by this mapper:
9530 ///     for (auto c : all_components) {
9531 ///       if (c.hasMapper())
9532 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9533 ///                       c.arg_type, c.arg_name);
9534 ///       else
9535 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9536 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9537 ///                                     c.arg_name);
9538 ///     }
9539 ///   }
9540 ///   // Delete the array section.
9541 ///   if (size > 1 && maptype.IsDelete)
9542 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9543 ///                                 size*sizeof(Ty), clearToFrom(type));
9544 /// }
9545 /// \endcode
9546 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9547                                             CodeGenFunction *CGF) {
9548   if (UDMMap.count(D) > 0)
9549     return;
9550   ASTContext &C = CGM.getContext();
9551   QualType Ty = D->getType();
9552   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9553   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9554   auto *MapperVarDecl =
9555       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9556   SourceLocation Loc = D->getLocation();
9557   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9558 
9559   // Prepare mapper function arguments and attributes.
9560   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9561                               C.VoidPtrTy, ImplicitParamDecl::Other);
9562   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9563                             ImplicitParamDecl::Other);
9564   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9565                              C.VoidPtrTy, ImplicitParamDecl::Other);
9566   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9567                             ImplicitParamDecl::Other);
9568   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9569                             ImplicitParamDecl::Other);
9570   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9571                             ImplicitParamDecl::Other);
9572   FunctionArgList Args;
9573   Args.push_back(&HandleArg);
9574   Args.push_back(&BaseArg);
9575   Args.push_back(&BeginArg);
9576   Args.push_back(&SizeArg);
9577   Args.push_back(&TypeArg);
9578   Args.push_back(&NameArg);
9579   const CGFunctionInfo &FnInfo =
9580       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9581   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9582   SmallString<64> TyStr;
9583   llvm::raw_svector_ostream Out(TyStr);
9584   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9585   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9586   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9587                                     Name, &CGM.getModule());
9588   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9589   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9590   // Start the mapper function code generation.
9591   CodeGenFunction MapperCGF(CGM);
9592   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9593   // Compute the starting and end addreses of array elements.
9594   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9595       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9596       C.getPointerType(Int64Ty), Loc);
9597   // Convert the size in bytes into the number of array elements.
9598   Size = MapperCGF.Builder.CreateExactUDiv(
9599       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9600   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9601       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9602       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9603   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9604   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9605       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9606       C.getPointerType(Int64Ty), Loc);
9607   // Prepare common arguments for array initiation and deletion.
9608   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9609       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9610       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9611   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9612       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9613       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9614   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9615       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9616       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9617 
9618   // Emit array initiation if this is an array section and \p MapType indicates
9619   // that memory allocation is required.
9620   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9621   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9622                              ElementSize, HeadBB, /*IsInit=*/true);
9623 
9624   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9625 
9626   // Emit the loop header block.
9627   MapperCGF.EmitBlock(HeadBB);
9628   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9629   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9630   // Evaluate whether the initial condition is satisfied.
9631   llvm::Value *IsEmpty =
9632       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9633   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9634   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9635 
9636   // Emit the loop body block.
9637   MapperCGF.EmitBlock(BodyBB);
9638   llvm::BasicBlock *LastBB = BodyBB;
9639   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9640       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9641   PtrPHI->addIncoming(PtrBegin, EntryBB);
9642   Address PtrCurrent =
9643       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9644                           .getAlignment()
9645                           .alignmentOfArrayElement(ElementSize));
9646   // Privatize the declared variable of mapper to be the current array element.
9647   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9648   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9649     return MapperCGF
9650         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9651         .getAddress(MapperCGF);
9652   });
9653   (void)Scope.Privatize();
9654 
9655   // Get map clause information. Fill up the arrays with all mapped variables.
9656   MappableExprsHandler::MapCombinedInfoTy Info;
9657   MappableExprsHandler MEHandler(*D, MapperCGF);
9658   MEHandler.generateAllInfoForMapper(Info);
9659 
9660   // Call the runtime API __tgt_mapper_num_components to get the number of
9661   // pre-existing components.
9662   llvm::Value *OffloadingArgs[] = {Handle};
9663   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9664       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9665                                             OMPRTL___tgt_mapper_num_components),
9666       OffloadingArgs);
9667   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9668       PreviousSize,
9669       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9670 
9671   // Fill up the runtime mapper handle for all components.
9672   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9673     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9674         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9675     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9676         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9677     llvm::Value *CurSizeArg = Info.Sizes[I];
9678     llvm::Value *CurNameArg =
9679         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9680             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9681             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9682 
9683     // Extract the MEMBER_OF field from the map type.
9684     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9685     MapperCGF.EmitBlock(MemberBB);
9686     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9687     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9688         OriMapType,
9689         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9690     llvm::BasicBlock *MemberCombineBB =
9691         MapperCGF.createBasicBlock("omp.member.combine");
9692     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9693     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9694     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9695     // Add the number of pre-existing components to the MEMBER_OF field if it
9696     // is valid.
9697     MapperCGF.EmitBlock(MemberCombineBB);
9698     llvm::Value *CombinedMember =
9699         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9700     // Do nothing if it is not a member of previous components.
9701     MapperCGF.EmitBlock(TypeBB);
9702     llvm::PHINode *MemberMapType =
9703         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9704     MemberMapType->addIncoming(OriMapType, MemberBB);
9705     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9706 
9707     // Combine the map type inherited from user-defined mapper with that
9708     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9709     // bits of the \a MapType, which is the input argument of the mapper
9710     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9711     // bits of MemberMapType.
9712     // [OpenMP 5.0], 1.2.6. map-type decay.
9713     //        | alloc |  to   | from  | tofrom | release | delete
9714     // ----------------------------------------------------------
9715     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9716     // to     | alloc |  to   | alloc |   to   | release | delete
9717     // from   | alloc | alloc | from  |  from  | release | delete
9718     // tofrom | alloc |  to   | from  | tofrom | release | delete
9719     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9720         MapType,
9721         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9722                                    MappableExprsHandler::OMP_MAP_FROM));
9723     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9724     llvm::BasicBlock *AllocElseBB =
9725         MapperCGF.createBasicBlock("omp.type.alloc.else");
9726     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9727     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9728     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9729     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9730     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9731     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9732     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9733     MapperCGF.EmitBlock(AllocBB);
9734     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9735         MemberMapType,
9736         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9737                                      MappableExprsHandler::OMP_MAP_FROM)));
9738     MapperCGF.Builder.CreateBr(EndBB);
9739     MapperCGF.EmitBlock(AllocElseBB);
9740     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9741         LeftToFrom,
9742         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9743     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9744     // In case of to, clear OMP_MAP_FROM.
9745     MapperCGF.EmitBlock(ToBB);
9746     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9747         MemberMapType,
9748         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9749     MapperCGF.Builder.CreateBr(EndBB);
9750     MapperCGF.EmitBlock(ToElseBB);
9751     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9752         LeftToFrom,
9753         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9754     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9755     // In case of from, clear OMP_MAP_TO.
9756     MapperCGF.EmitBlock(FromBB);
9757     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9758         MemberMapType,
9759         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9760     // In case of tofrom, do nothing.
9761     MapperCGF.EmitBlock(EndBB);
9762     LastBB = EndBB;
9763     llvm::PHINode *CurMapType =
9764         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9765     CurMapType->addIncoming(AllocMapType, AllocBB);
9766     CurMapType->addIncoming(ToMapType, ToBB);
9767     CurMapType->addIncoming(FromMapType, FromBB);
9768     CurMapType->addIncoming(MemberMapType, ToElseBB);
9769 
9770     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9771                                      CurSizeArg, CurMapType, CurNameArg};
9772     if (Info.Mappers[I]) {
9773       // Call the corresponding mapper function.
9774       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9775           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9776       assert(MapperFunc && "Expect a valid mapper function is available.");
9777       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9778     } else {
9779       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9780       // data structure.
9781       MapperCGF.EmitRuntimeCall(
9782           OMPBuilder.getOrCreateRuntimeFunction(
9783               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9784           OffloadingArgs);
9785     }
9786   }
9787 
9788   // Update the pointer to point to the next element that needs to be mapped,
9789   // and check whether we have mapped all elements.
9790   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9791       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9792   PtrPHI->addIncoming(PtrNext, LastBB);
9793   llvm::Value *IsDone =
9794       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9795   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9796   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9797 
9798   MapperCGF.EmitBlock(ExitBB);
9799   // Emit array deletion if this is an array section and \p MapType indicates
9800   // that deletion is required.
9801   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9802                              ElementSize, DoneBB, /*IsInit=*/false);
9803 
9804   // Emit the function exit block.
9805   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9806   MapperCGF.FinishFunction();
9807   UDMMap.try_emplace(D, Fn);
9808   if (CGF) {
9809     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9810     Decls.second.push_back(D);
9811   }
9812 }
9813 
9814 /// Emit the array initialization or deletion portion for user-defined mapper
9815 /// code generation. First, it evaluates whether an array section is mapped and
9816 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9817 /// true, and \a MapType indicates to not delete this array, array
9818 /// initialization code is generated. If \a IsInit is false, and \a MapType
9819 /// indicates to not this array, array deletion code is generated.
9820 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9821     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9822     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9823     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9824   StringRef Prefix = IsInit ? ".init" : ".del";
9825 
9826   // Evaluate if this is an array section.
9827   llvm::BasicBlock *IsDeleteBB =
9828       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9829   llvm::BasicBlock *BodyBB =
9830       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9831   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9832       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9833   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9834 
9835   // Evaluate if we are going to delete this section.
9836   MapperCGF.EmitBlock(IsDeleteBB);
9837   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9838       MapType,
9839       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9840   llvm::Value *DeleteCond;
9841   if (IsInit) {
9842     DeleteCond = MapperCGF.Builder.CreateIsNull(
9843         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9844   } else {
9845     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9846         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9847   }
9848   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9849 
9850   MapperCGF.EmitBlock(BodyBB);
9851   // Get the array size by multiplying element size and element number (i.e., \p
9852   // Size).
9853   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9854       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9855   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9856   // memory allocation/deletion purpose only.
9857   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9858       MapType,
9859       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9860                                    MappableExprsHandler::OMP_MAP_FROM)));
9861   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9862 
9863   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9864   // data structure.
9865   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9866                                    ArraySize, MapTypeArg, MapNameArg};
9867   MapperCGF.EmitRuntimeCall(
9868       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9869                                             OMPRTL___tgt_push_mapper_component),
9870       OffloadingArgs);
9871 }
9872 
9873 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9874     const OMPDeclareMapperDecl *D) {
9875   auto I = UDMMap.find(D);
9876   if (I != UDMMap.end())
9877     return I->second;
9878   emitUserDefinedMapper(D);
9879   return UDMMap.lookup(D);
9880 }
9881 
9882 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9883     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9884     llvm::Value *DeviceID,
9885     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9886                                      const OMPLoopDirective &D)>
9887         SizeEmitter) {
9888   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9889   const OMPExecutableDirective *TD = &D;
9890   // Get nested teams distribute kind directive, if any.
9891   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9892     TD = getNestedDistributeDirective(CGM.getContext(), D);
9893   if (!TD)
9894     return;
9895   const auto *LD = cast<OMPLoopDirective>(TD);
9896   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9897                                                          PrePostActionTy &) {
9898     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9899       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9900       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9901       CGF.EmitRuntimeCall(
9902           OMPBuilder.getOrCreateRuntimeFunction(
9903               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
9904           Args);
9905     }
9906   };
9907   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9908 }
9909 
9910 void CGOpenMPRuntime::emitTargetCall(
9911     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9912     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9913     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9914     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9915                                      const OMPLoopDirective &D)>
9916         SizeEmitter) {
9917   if (!CGF.HaveInsertPoint())
9918     return;
9919 
9920   assert(OutlinedFn && "Invalid outlined function!");
9921 
9922   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9923                                  D.hasClausesOfKind<OMPNowaitClause>();
9924   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9925   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9926   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9927                                             PrePostActionTy &) {
9928     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9929   };
9930   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9931 
9932   CodeGenFunction::OMPTargetDataInfo InputInfo;
9933   llvm::Value *MapTypesArray = nullptr;
9934   llvm::Value *MapNamesArray = nullptr;
9935   // Fill up the pointer arrays and transfer execution to the device.
9936   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9937                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9938                     &CapturedVars,
9939                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9940     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9941       // Reverse offloading is not supported, so just execute on the host.
9942       if (RequiresOuterTask) {
9943         CapturedVars.clear();
9944         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9945       }
9946       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9947       return;
9948     }
9949 
9950     // On top of the arrays that were filled up, the target offloading call
9951     // takes as arguments the device id as well as the host pointer. The host
9952     // pointer is used by the runtime library to identify the current target
9953     // region, so it only has to be unique and not necessarily point to
9954     // anything. It could be the pointer to the outlined function that
9955     // implements the target region, but we aren't using that so that the
9956     // compiler doesn't need to keep that, and could therefore inline the host
9957     // function if proven worthwhile during optimization.
9958 
9959     // From this point on, we need to have an ID of the target region defined.
9960     assert(OutlinedFnID && "Invalid outlined function ID!");
9961 
9962     // Emit device ID if any.
9963     llvm::Value *DeviceID;
9964     if (Device.getPointer()) {
9965       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9966               Device.getInt() == OMPC_DEVICE_device_num) &&
9967              "Expected device_num modifier.");
9968       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9969       DeviceID =
9970           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9971     } else {
9972       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9973     }
9974 
9975     // Emit the number of elements in the offloading arrays.
9976     llvm::Value *PointerNum =
9977         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9978 
9979     // Return value of the runtime offloading call.
9980     llvm::Value *Return;
9981 
9982     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9983     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9984 
9985     // Source location for the ident struct
9986     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9987 
9988     // Emit tripcount for the target loop-based directive.
9989     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9990 
9991     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9992     // The target region is an outlined function launched by the runtime
9993     // via calls __tgt_target() or __tgt_target_teams().
9994     //
9995     // __tgt_target() launches a target region with one team and one thread,
9996     // executing a serial region.  This master thread may in turn launch
9997     // more threads within its team upon encountering a parallel region,
9998     // however, no additional teams can be launched on the device.
9999     //
10000     // __tgt_target_teams() launches a target region with one or more teams,
10001     // each with one or more threads.  This call is required for target
10002     // constructs such as:
10003     //  'target teams'
10004     //  'target' / 'teams'
10005     //  'target teams distribute parallel for'
10006     //  'target parallel'
10007     // and so on.
10008     //
10009     // Note that on the host and CPU targets, the runtime implementation of
10010     // these calls simply call the outlined function without forking threads.
10011     // The outlined functions themselves have runtime calls to
10012     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10013     // the compiler in emitTeamsCall() and emitParallelCall().
10014     //
10015     // In contrast, on the NVPTX target, the implementation of
10016     // __tgt_target_teams() launches a GPU kernel with the requested number
10017     // of teams and threads so no additional calls to the runtime are required.
10018     if (NumTeams) {
10019       // If we have NumTeams defined this means that we have an enclosed teams
10020       // region. Therefore we also expect to have NumThreads defined. These two
10021       // values should be defined in the presence of a teams directive,
10022       // regardless of having any clauses associated. If the user is using teams
10023       // but no clauses, these two values will be the default that should be
10024       // passed to the runtime library - a 32-bit integer with the value zero.
10025       assert(NumThreads && "Thread limit expression should be available along "
10026                            "with number of teams.");
10027       llvm::Value *OffloadingArgs[] = {RTLoc,
10028                                        DeviceID,
10029                                        OutlinedFnID,
10030                                        PointerNum,
10031                                        InputInfo.BasePointersArray.getPointer(),
10032                                        InputInfo.PointersArray.getPointer(),
10033                                        InputInfo.SizesArray.getPointer(),
10034                                        MapTypesArray,
10035                                        MapNamesArray,
10036                                        InputInfo.MappersArray.getPointer(),
10037                                        NumTeams,
10038                                        NumThreads};
10039       Return = CGF.EmitRuntimeCall(
10040           OMPBuilder.getOrCreateRuntimeFunction(
10041               CGM.getModule(), HasNowait
10042                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10043                                    : OMPRTL___tgt_target_teams_mapper),
10044           OffloadingArgs);
10045     } else {
10046       llvm::Value *OffloadingArgs[] = {RTLoc,
10047                                        DeviceID,
10048                                        OutlinedFnID,
10049                                        PointerNum,
10050                                        InputInfo.BasePointersArray.getPointer(),
10051                                        InputInfo.PointersArray.getPointer(),
10052                                        InputInfo.SizesArray.getPointer(),
10053                                        MapTypesArray,
10054                                        MapNamesArray,
10055                                        InputInfo.MappersArray.getPointer()};
10056       Return = CGF.EmitRuntimeCall(
10057           OMPBuilder.getOrCreateRuntimeFunction(
10058               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10059                                          : OMPRTL___tgt_target_mapper),
10060           OffloadingArgs);
10061     }
10062 
10063     // Check the error code and execute the host version if required.
10064     llvm::BasicBlock *OffloadFailedBlock =
10065         CGF.createBasicBlock("omp_offload.failed");
10066     llvm::BasicBlock *OffloadContBlock =
10067         CGF.createBasicBlock("omp_offload.cont");
10068     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10069     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10070 
10071     CGF.EmitBlock(OffloadFailedBlock);
10072     if (RequiresOuterTask) {
10073       CapturedVars.clear();
10074       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10075     }
10076     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10077     CGF.EmitBranch(OffloadContBlock);
10078 
10079     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10080   };
10081 
10082   // Notify that the host version must be executed.
10083   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10084                     RequiresOuterTask](CodeGenFunction &CGF,
10085                                        PrePostActionTy &) {
10086     if (RequiresOuterTask) {
10087       CapturedVars.clear();
10088       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10089     }
10090     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10091   };
10092 
10093   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10094                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10095                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10096     // Fill up the arrays with all the captured variables.
10097     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10098 
10099     // Get mappable expression information.
10100     MappableExprsHandler MEHandler(D, CGF);
10101     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10102     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10103 
10104     auto RI = CS.getCapturedRecordDecl()->field_begin();
10105     auto CV = CapturedVars.begin();
10106     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10107                                               CE = CS.capture_end();
10108          CI != CE; ++CI, ++RI, ++CV) {
10109       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10110       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10111 
10112       // VLA sizes are passed to the outlined region by copy and do not have map
10113       // information associated.
10114       if (CI->capturesVariableArrayType()) {
10115         CurInfo.Exprs.push_back(nullptr);
10116         CurInfo.BasePointers.push_back(*CV);
10117         CurInfo.Pointers.push_back(*CV);
10118         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10119             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10120         // Copy to the device as an argument. No need to retrieve it.
10121         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10122                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10123                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10124         CurInfo.Mappers.push_back(nullptr);
10125       } else {
10126         // If we have any information in the map clause, we use it, otherwise we
10127         // just do a default mapping.
10128         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10129         if (!CI->capturesThis())
10130           MappedVarSet.insert(CI->getCapturedVar());
10131         else
10132           MappedVarSet.insert(nullptr);
10133         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10134           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10135         // Generate correct mapping for variables captured by reference in
10136         // lambdas.
10137         if (CI->capturesVariable())
10138           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10139                                                   CurInfo, LambdaPointers);
10140       }
10141       // We expect to have at least an element of information for this capture.
10142       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10143              "Non-existing map pointer for capture!");
10144       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10145              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10146              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10147              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10148              "Inconsistent map information sizes!");
10149 
10150       // If there is an entry in PartialStruct it means we have a struct with
10151       // individual members mapped. Emit an extra combined entry.
10152       if (PartialStruct.Base.isValid())
10153         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
10154                                     nullptr, /*NoTargetParam=*/false);
10155 
10156       // We need to append the results of this capture to what we already have.
10157       CombinedInfo.append(CurInfo);
10158     }
10159     // Adjust MEMBER_OF flags for the lambdas captures.
10160     MEHandler.adjustMemberOfForLambdaCaptures(
10161         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10162         CombinedInfo.Types);
10163     // Map any list items in a map clause that were not captures because they
10164     // weren't referenced within the construct.
10165     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10166 
10167     TargetDataInfo Info;
10168     // Fill up the arrays and create the arguments.
10169     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10170     emitOffloadingArraysArgument(
10171         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10172         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10173         {/*ForEndTask=*/false});
10174 
10175     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10176     InputInfo.BasePointersArray =
10177         Address(Info.BasePointersArray, CGM.getPointerAlign());
10178     InputInfo.PointersArray =
10179         Address(Info.PointersArray, CGM.getPointerAlign());
10180     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10181     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10182     MapTypesArray = Info.MapTypesArray;
10183     MapNamesArray = Info.MapNamesArray;
10184     if (RequiresOuterTask)
10185       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10186     else
10187       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10188   };
10189 
10190   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10191                              CodeGenFunction &CGF, PrePostActionTy &) {
10192     if (RequiresOuterTask) {
10193       CodeGenFunction::OMPTargetDataInfo InputInfo;
10194       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10195     } else {
10196       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10197     }
10198   };
10199 
10200   // If we have a target function ID it means that we need to support
10201   // offloading, otherwise, just execute on the host. We need to execute on host
10202   // regardless of the conditional in the if clause if, e.g., the user do not
10203   // specify target triples.
10204   if (OutlinedFnID) {
10205     if (IfCond) {
10206       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10207     } else {
10208       RegionCodeGenTy ThenRCG(TargetThenGen);
10209       ThenRCG(CGF);
10210     }
10211   } else {
10212     RegionCodeGenTy ElseRCG(TargetElseGen);
10213     ElseRCG(CGF);
10214   }
10215 }
10216 
10217 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10218                                                     StringRef ParentName) {
10219   if (!S)
10220     return;
10221 
10222   // Codegen OMP target directives that offload compute to the device.
10223   bool RequiresDeviceCodegen =
10224       isa<OMPExecutableDirective>(S) &&
10225       isOpenMPTargetExecutionDirective(
10226           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10227 
10228   if (RequiresDeviceCodegen) {
10229     const auto &E = *cast<OMPExecutableDirective>(S);
10230     unsigned DeviceID;
10231     unsigned FileID;
10232     unsigned Line;
10233     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10234                              FileID, Line);
10235 
10236     // Is this a target region that should not be emitted as an entry point? If
10237     // so just signal we are done with this target region.
10238     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10239                                                             ParentName, Line))
10240       return;
10241 
10242     switch (E.getDirectiveKind()) {
10243     case OMPD_target:
10244       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10245                                                    cast<OMPTargetDirective>(E));
10246       break;
10247     case OMPD_target_parallel:
10248       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10249           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10250       break;
10251     case OMPD_target_teams:
10252       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10253           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10254       break;
10255     case OMPD_target_teams_distribute:
10256       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10257           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10258       break;
10259     case OMPD_target_teams_distribute_simd:
10260       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10261           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10262       break;
10263     case OMPD_target_parallel_for:
10264       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10265           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10266       break;
10267     case OMPD_target_parallel_for_simd:
10268       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10269           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10270       break;
10271     case OMPD_target_simd:
10272       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10273           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10274       break;
10275     case OMPD_target_teams_distribute_parallel_for:
10276       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10277           CGM, ParentName,
10278           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10279       break;
10280     case OMPD_target_teams_distribute_parallel_for_simd:
10281       CodeGenFunction::
10282           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10283               CGM, ParentName,
10284               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10285       break;
10286     case OMPD_parallel:
10287     case OMPD_for:
10288     case OMPD_parallel_for:
10289     case OMPD_parallel_master:
10290     case OMPD_parallel_sections:
10291     case OMPD_for_simd:
10292     case OMPD_parallel_for_simd:
10293     case OMPD_cancel:
10294     case OMPD_cancellation_point:
10295     case OMPD_ordered:
10296     case OMPD_threadprivate:
10297     case OMPD_allocate:
10298     case OMPD_task:
10299     case OMPD_simd:
10300     case OMPD_sections:
10301     case OMPD_section:
10302     case OMPD_single:
10303     case OMPD_master:
10304     case OMPD_critical:
10305     case OMPD_taskyield:
10306     case OMPD_barrier:
10307     case OMPD_taskwait:
10308     case OMPD_taskgroup:
10309     case OMPD_atomic:
10310     case OMPD_flush:
10311     case OMPD_depobj:
10312     case OMPD_scan:
10313     case OMPD_teams:
10314     case OMPD_target_data:
10315     case OMPD_target_exit_data:
10316     case OMPD_target_enter_data:
10317     case OMPD_distribute:
10318     case OMPD_distribute_simd:
10319     case OMPD_distribute_parallel_for:
10320     case OMPD_distribute_parallel_for_simd:
10321     case OMPD_teams_distribute:
10322     case OMPD_teams_distribute_simd:
10323     case OMPD_teams_distribute_parallel_for:
10324     case OMPD_teams_distribute_parallel_for_simd:
10325     case OMPD_target_update:
10326     case OMPD_declare_simd:
10327     case OMPD_declare_variant:
10328     case OMPD_begin_declare_variant:
10329     case OMPD_end_declare_variant:
10330     case OMPD_declare_target:
10331     case OMPD_end_declare_target:
10332     case OMPD_declare_reduction:
10333     case OMPD_declare_mapper:
10334     case OMPD_taskloop:
10335     case OMPD_taskloop_simd:
10336     case OMPD_master_taskloop:
10337     case OMPD_master_taskloop_simd:
10338     case OMPD_parallel_master_taskloop:
10339     case OMPD_parallel_master_taskloop_simd:
10340     case OMPD_requires:
10341     case OMPD_unknown:
10342     default:
10343       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10344     }
10345     return;
10346   }
10347 
10348   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10349     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10350       return;
10351 
10352     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10353     return;
10354   }
10355 
10356   // If this is a lambda function, look into its body.
10357   if (const auto *L = dyn_cast<LambdaExpr>(S))
10358     S = L->getBody();
10359 
10360   // Keep looking for target regions recursively.
10361   for (const Stmt *II : S->children())
10362     scanForTargetRegionsFunctions(II, ParentName);
10363 }
10364 
10365 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10366   // If emitting code for the host, we do not process FD here. Instead we do
10367   // the normal code generation.
10368   if (!CGM.getLangOpts().OpenMPIsDevice) {
10369     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10370       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10371           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10372       // Do not emit device_type(nohost) functions for the host.
10373       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10374         return true;
10375     }
10376     return false;
10377   }
10378 
10379   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10380   // Try to detect target regions in the function.
10381   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10382     StringRef Name = CGM.getMangledName(GD);
10383     scanForTargetRegionsFunctions(FD->getBody(), Name);
10384     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10385         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10386     // Do not emit device_type(nohost) functions for the host.
10387     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10388       return true;
10389   }
10390 
10391   // Do not to emit function if it is not marked as declare target.
10392   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10393          AlreadyEmittedTargetDecls.count(VD) == 0;
10394 }
10395 
10396 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10397   if (!CGM.getLangOpts().OpenMPIsDevice)
10398     return false;
10399 
10400   // Check if there are Ctors/Dtors in this declaration and look for target
10401   // regions in it. We use the complete variant to produce the kernel name
10402   // mangling.
10403   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10404   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10405     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10406       StringRef ParentName =
10407           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10408       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10409     }
10410     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10411       StringRef ParentName =
10412           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10413       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10414     }
10415   }
10416 
10417   // Do not to emit variable if it is not marked as declare target.
10418   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10419       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10420           cast<VarDecl>(GD.getDecl()));
10421   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10422       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10423        HasRequiresUnifiedSharedMemory)) {
10424     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10425     return true;
10426   }
10427   return false;
10428 }
10429 
10430 llvm::Constant *
10431 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10432                                                 const VarDecl *VD) {
10433   assert(VD->getType().isConstant(CGM.getContext()) &&
10434          "Expected constant variable.");
10435   StringRef VarName;
10436   llvm::Constant *Addr;
10437   llvm::GlobalValue::LinkageTypes Linkage;
10438   QualType Ty = VD->getType();
10439   SmallString<128> Buffer;
10440   {
10441     unsigned DeviceID;
10442     unsigned FileID;
10443     unsigned Line;
10444     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10445                              FileID, Line);
10446     llvm::raw_svector_ostream OS(Buffer);
10447     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10448        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10449     VarName = OS.str();
10450   }
10451   Linkage = llvm::GlobalValue::InternalLinkage;
10452   Addr =
10453       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10454                                   getDefaultFirstprivateAddressSpace());
10455   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10456   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10457   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10458   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10459       VarName, Addr, VarSize,
10460       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10461   return Addr;
10462 }
10463 
10464 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10465                                                    llvm::Constant *Addr) {
10466   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10467       !CGM.getLangOpts().OpenMPIsDevice)
10468     return;
10469   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10470       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10471   if (!Res) {
10472     if (CGM.getLangOpts().OpenMPIsDevice) {
10473       // Register non-target variables being emitted in device code (debug info
10474       // may cause this).
10475       StringRef VarName = CGM.getMangledName(VD);
10476       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10477     }
10478     return;
10479   }
10480   // Register declare target variables.
10481   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10482   StringRef VarName;
10483   CharUnits VarSize;
10484   llvm::GlobalValue::LinkageTypes Linkage;
10485 
10486   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10487       !HasRequiresUnifiedSharedMemory) {
10488     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10489     VarName = CGM.getMangledName(VD);
10490     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10491       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10492       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10493     } else {
10494       VarSize = CharUnits::Zero();
10495     }
10496     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10497     // Temp solution to prevent optimizations of the internal variables.
10498     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10499       std::string RefName = getName({VarName, "ref"});
10500       if (!CGM.GetGlobalValue(RefName)) {
10501         llvm::Constant *AddrRef =
10502             getOrCreateInternalVariable(Addr->getType(), RefName);
10503         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10504         GVAddrRef->setConstant(/*Val=*/true);
10505         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10506         GVAddrRef->setInitializer(Addr);
10507         CGM.addCompilerUsedGlobal(GVAddrRef);
10508       }
10509     }
10510   } else {
10511     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10512             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10513              HasRequiresUnifiedSharedMemory)) &&
10514            "Declare target attribute must link or to with unified memory.");
10515     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10516       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10517     else
10518       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10519 
10520     if (CGM.getLangOpts().OpenMPIsDevice) {
10521       VarName = Addr->getName();
10522       Addr = nullptr;
10523     } else {
10524       VarName = getAddrOfDeclareTargetVar(VD).getName();
10525       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10526     }
10527     VarSize = CGM.getPointerSize();
10528     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10529   }
10530 
10531   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10532       VarName, Addr, VarSize, Flags, Linkage);
10533 }
10534 
10535 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10536   if (isa<FunctionDecl>(GD.getDecl()) ||
10537       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10538     return emitTargetFunctions(GD);
10539 
10540   return emitTargetGlobalVariable(GD);
10541 }
10542 
10543 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10544   for (const VarDecl *VD : DeferredGlobalVariables) {
10545     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10546         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10547     if (!Res)
10548       continue;
10549     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10550         !HasRequiresUnifiedSharedMemory) {
10551       CGM.EmitGlobal(VD);
10552     } else {
10553       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10554               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10555                HasRequiresUnifiedSharedMemory)) &&
10556              "Expected link clause or to clause with unified memory.");
10557       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10558     }
10559   }
10560 }
10561 
10562 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10563     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10564   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10565          " Expected target-based directive.");
10566 }
10567 
10568 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10569   for (const OMPClause *Clause : D->clauselists()) {
10570     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10571       HasRequiresUnifiedSharedMemory = true;
10572     } else if (const auto *AC =
10573                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10574       switch (AC->getAtomicDefaultMemOrderKind()) {
10575       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10576         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10577         break;
10578       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10579         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10580         break;
10581       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10582         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10583         break;
10584       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10585         break;
10586       }
10587     }
10588   }
10589 }
10590 
10591 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10592   return RequiresAtomicOrdering;
10593 }
10594 
10595 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10596                                                        LangAS &AS) {
10597   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10598     return false;
10599   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10600   switch(A->getAllocatorType()) {
10601   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10602   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10603   // Not supported, fallback to the default mem space.
10604   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10605   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10606   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10607   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10608   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10609   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10610   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10611     AS = LangAS::Default;
10612     return true;
10613   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10614     llvm_unreachable("Expected predefined allocator for the variables with the "
10615                      "static storage.");
10616   }
10617   return false;
10618 }
10619 
10620 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10621   return HasRequiresUnifiedSharedMemory;
10622 }
10623 
10624 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10625     CodeGenModule &CGM)
10626     : CGM(CGM) {
10627   if (CGM.getLangOpts().OpenMPIsDevice) {
10628     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10629     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10630   }
10631 }
10632 
10633 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10634   if (CGM.getLangOpts().OpenMPIsDevice)
10635     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10636 }
10637 
10638 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10639   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10640     return true;
10641 
10642   const auto *D = cast<FunctionDecl>(GD.getDecl());
10643   // Do not to emit function if it is marked as declare target as it was already
10644   // emitted.
10645   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10646     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10647       if (auto *F = dyn_cast_or_null<llvm::Function>(
10648               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10649         return !F->isDeclaration();
10650       return false;
10651     }
10652     return true;
10653   }
10654 
10655   return !AlreadyEmittedTargetDecls.insert(D).second;
10656 }
10657 
10658 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10659   // If we don't have entries or if we are emitting code for the device, we
10660   // don't need to do anything.
10661   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10662       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10663       (OffloadEntriesInfoManager.empty() &&
10664        !HasEmittedDeclareTargetRegion &&
10665        !HasEmittedTargetRegion))
10666     return nullptr;
10667 
10668   // Create and register the function that handles the requires directives.
10669   ASTContext &C = CGM.getContext();
10670 
10671   llvm::Function *RequiresRegFn;
10672   {
10673     CodeGenFunction CGF(CGM);
10674     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10675     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10676     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10677     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10678     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10679     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10680     // TODO: check for other requires clauses.
10681     // The requires directive takes effect only when a target region is
10682     // present in the compilation unit. Otherwise it is ignored and not
10683     // passed to the runtime. This avoids the runtime from throwing an error
10684     // for mismatching requires clauses across compilation units that don't
10685     // contain at least 1 target region.
10686     assert((HasEmittedTargetRegion ||
10687             HasEmittedDeclareTargetRegion ||
10688             !OffloadEntriesInfoManager.empty()) &&
10689            "Target or declare target region expected.");
10690     if (HasRequiresUnifiedSharedMemory)
10691       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10692     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10693                             CGM.getModule(), OMPRTL___tgt_register_requires),
10694                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10695     CGF.FinishFunction();
10696   }
10697   return RequiresRegFn;
10698 }
10699 
10700 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10701                                     const OMPExecutableDirective &D,
10702                                     SourceLocation Loc,
10703                                     llvm::Function *OutlinedFn,
10704                                     ArrayRef<llvm::Value *> CapturedVars) {
10705   if (!CGF.HaveInsertPoint())
10706     return;
10707 
10708   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10709   CodeGenFunction::RunCleanupsScope Scope(CGF);
10710 
10711   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10712   llvm::Value *Args[] = {
10713       RTLoc,
10714       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10715       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10716   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10717   RealArgs.append(std::begin(Args), std::end(Args));
10718   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10719 
10720   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10721       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10722   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10723 }
10724 
10725 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10726                                          const Expr *NumTeams,
10727                                          const Expr *ThreadLimit,
10728                                          SourceLocation Loc) {
10729   if (!CGF.HaveInsertPoint())
10730     return;
10731 
10732   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10733 
10734   llvm::Value *NumTeamsVal =
10735       NumTeams
10736           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10737                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10738           : CGF.Builder.getInt32(0);
10739 
10740   llvm::Value *ThreadLimitVal =
10741       ThreadLimit
10742           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10743                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10744           : CGF.Builder.getInt32(0);
10745 
10746   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10747   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10748                                      ThreadLimitVal};
10749   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10750                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10751                       PushNumTeamsArgs);
10752 }
10753 
10754 void CGOpenMPRuntime::emitTargetDataCalls(
10755     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10756     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10757   if (!CGF.HaveInsertPoint())
10758     return;
10759 
10760   // Action used to replace the default codegen action and turn privatization
10761   // off.
10762   PrePostActionTy NoPrivAction;
10763 
10764   // Generate the code for the opening of the data environment. Capture all the
10765   // arguments of the runtime call by reference because they are used in the
10766   // closing of the region.
10767   auto &&BeginThenGen = [this, &D, Device, &Info,
10768                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10769     // Fill up the arrays with all the mapped variables.
10770     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10771 
10772     // Get map clause information.
10773     MappableExprsHandler MEHandler(D, CGF);
10774     MEHandler.generateAllInfo(CombinedInfo);
10775 
10776     // Fill up the arrays and create the arguments.
10777     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10778                          /*IsNonContiguous=*/true);
10779 
10780     llvm::Value *BasePointersArrayArg = nullptr;
10781     llvm::Value *PointersArrayArg = nullptr;
10782     llvm::Value *SizesArrayArg = nullptr;
10783     llvm::Value *MapTypesArrayArg = nullptr;
10784     llvm::Value *MapNamesArrayArg = nullptr;
10785     llvm::Value *MappersArrayArg = nullptr;
10786     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10787                                  SizesArrayArg, MapTypesArrayArg,
10788                                  MapNamesArrayArg, MappersArrayArg, Info);
10789 
10790     // Emit device ID if any.
10791     llvm::Value *DeviceID = nullptr;
10792     if (Device) {
10793       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10794                                            CGF.Int64Ty, /*isSigned=*/true);
10795     } else {
10796       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10797     }
10798 
10799     // Emit the number of elements in the offloading arrays.
10800     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10801     //
10802     // Source location for the ident struct
10803     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10804 
10805     llvm::Value *OffloadingArgs[] = {RTLoc,
10806                                      DeviceID,
10807                                      PointerNum,
10808                                      BasePointersArrayArg,
10809                                      PointersArrayArg,
10810                                      SizesArrayArg,
10811                                      MapTypesArrayArg,
10812                                      MapNamesArrayArg,
10813                                      MappersArrayArg};
10814     CGF.EmitRuntimeCall(
10815         OMPBuilder.getOrCreateRuntimeFunction(
10816             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10817         OffloadingArgs);
10818 
10819     // If device pointer privatization is required, emit the body of the region
10820     // here. It will have to be duplicated: with and without privatization.
10821     if (!Info.CaptureDeviceAddrMap.empty())
10822       CodeGen(CGF);
10823   };
10824 
10825   // Generate code for the closing of the data region.
10826   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10827                                                 PrePostActionTy &) {
10828     assert(Info.isValid() && "Invalid data environment closing arguments.");
10829 
10830     llvm::Value *BasePointersArrayArg = nullptr;
10831     llvm::Value *PointersArrayArg = nullptr;
10832     llvm::Value *SizesArrayArg = nullptr;
10833     llvm::Value *MapTypesArrayArg = nullptr;
10834     llvm::Value *MapNamesArrayArg = nullptr;
10835     llvm::Value *MappersArrayArg = nullptr;
10836     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10837                                  SizesArrayArg, MapTypesArrayArg,
10838                                  MapNamesArrayArg, MappersArrayArg, Info,
10839                                  {/*ForEndCall=*/true});
10840 
10841     // Emit device ID if any.
10842     llvm::Value *DeviceID = nullptr;
10843     if (Device) {
10844       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10845                                            CGF.Int64Ty, /*isSigned=*/true);
10846     } else {
10847       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10848     }
10849 
10850     // Emit the number of elements in the offloading arrays.
10851     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10852 
10853     // Source location for the ident struct
10854     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10855 
10856     llvm::Value *OffloadingArgs[] = {RTLoc,
10857                                      DeviceID,
10858                                      PointerNum,
10859                                      BasePointersArrayArg,
10860                                      PointersArrayArg,
10861                                      SizesArrayArg,
10862                                      MapTypesArrayArg,
10863                                      MapNamesArrayArg,
10864                                      MappersArrayArg};
10865     CGF.EmitRuntimeCall(
10866         OMPBuilder.getOrCreateRuntimeFunction(
10867             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10868         OffloadingArgs);
10869   };
10870 
10871   // If we need device pointer privatization, we need to emit the body of the
10872   // region with no privatization in the 'else' branch of the conditional.
10873   // Otherwise, we don't have to do anything.
10874   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10875                                                          PrePostActionTy &) {
10876     if (!Info.CaptureDeviceAddrMap.empty()) {
10877       CodeGen.setAction(NoPrivAction);
10878       CodeGen(CGF);
10879     }
10880   };
10881 
10882   // We don't have to do anything to close the region if the if clause evaluates
10883   // to false.
10884   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10885 
10886   if (IfCond) {
10887     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10888   } else {
10889     RegionCodeGenTy RCG(BeginThenGen);
10890     RCG(CGF);
10891   }
10892 
10893   // If we don't require privatization of device pointers, we emit the body in
10894   // between the runtime calls. This avoids duplicating the body code.
10895   if (Info.CaptureDeviceAddrMap.empty()) {
10896     CodeGen.setAction(NoPrivAction);
10897     CodeGen(CGF);
10898   }
10899 
10900   if (IfCond) {
10901     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10902   } else {
10903     RegionCodeGenTy RCG(EndThenGen);
10904     RCG(CGF);
10905   }
10906 }
10907 
10908 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10909     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10910     const Expr *Device) {
10911   if (!CGF.HaveInsertPoint())
10912     return;
10913 
10914   assert((isa<OMPTargetEnterDataDirective>(D) ||
10915           isa<OMPTargetExitDataDirective>(D) ||
10916           isa<OMPTargetUpdateDirective>(D)) &&
10917          "Expecting either target enter, exit data, or update directives.");
10918 
10919   CodeGenFunction::OMPTargetDataInfo InputInfo;
10920   llvm::Value *MapTypesArray = nullptr;
10921   llvm::Value *MapNamesArray = nullptr;
10922   // Generate the code for the opening of the data environment.
10923   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10924                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10925     // Emit device ID if any.
10926     llvm::Value *DeviceID = nullptr;
10927     if (Device) {
10928       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10929                                            CGF.Int64Ty, /*isSigned=*/true);
10930     } else {
10931       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10932     }
10933 
10934     // Emit the number of elements in the offloading arrays.
10935     llvm::Constant *PointerNum =
10936         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10937 
10938     // Source location for the ident struct
10939     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10940 
10941     llvm::Value *OffloadingArgs[] = {RTLoc,
10942                                      DeviceID,
10943                                      PointerNum,
10944                                      InputInfo.BasePointersArray.getPointer(),
10945                                      InputInfo.PointersArray.getPointer(),
10946                                      InputInfo.SizesArray.getPointer(),
10947                                      MapTypesArray,
10948                                      MapNamesArray,
10949                                      InputInfo.MappersArray.getPointer()};
10950 
10951     // Select the right runtime function call for each standalone
10952     // directive.
10953     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10954     RuntimeFunction RTLFn;
10955     switch (D.getDirectiveKind()) {
10956     case OMPD_target_enter_data:
10957       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10958                         : OMPRTL___tgt_target_data_begin_mapper;
10959       break;
10960     case OMPD_target_exit_data:
10961       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10962                         : OMPRTL___tgt_target_data_end_mapper;
10963       break;
10964     case OMPD_target_update:
10965       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10966                         : OMPRTL___tgt_target_data_update_mapper;
10967       break;
10968     case OMPD_parallel:
10969     case OMPD_for:
10970     case OMPD_parallel_for:
10971     case OMPD_parallel_master:
10972     case OMPD_parallel_sections:
10973     case OMPD_for_simd:
10974     case OMPD_parallel_for_simd:
10975     case OMPD_cancel:
10976     case OMPD_cancellation_point:
10977     case OMPD_ordered:
10978     case OMPD_threadprivate:
10979     case OMPD_allocate:
10980     case OMPD_task:
10981     case OMPD_simd:
10982     case OMPD_sections:
10983     case OMPD_section:
10984     case OMPD_single:
10985     case OMPD_master:
10986     case OMPD_critical:
10987     case OMPD_taskyield:
10988     case OMPD_barrier:
10989     case OMPD_taskwait:
10990     case OMPD_taskgroup:
10991     case OMPD_atomic:
10992     case OMPD_flush:
10993     case OMPD_depobj:
10994     case OMPD_scan:
10995     case OMPD_teams:
10996     case OMPD_target_data:
10997     case OMPD_distribute:
10998     case OMPD_distribute_simd:
10999     case OMPD_distribute_parallel_for:
11000     case OMPD_distribute_parallel_for_simd:
11001     case OMPD_teams_distribute:
11002     case OMPD_teams_distribute_simd:
11003     case OMPD_teams_distribute_parallel_for:
11004     case OMPD_teams_distribute_parallel_for_simd:
11005     case OMPD_declare_simd:
11006     case OMPD_declare_variant:
11007     case OMPD_begin_declare_variant:
11008     case OMPD_end_declare_variant:
11009     case OMPD_declare_target:
11010     case OMPD_end_declare_target:
11011     case OMPD_declare_reduction:
11012     case OMPD_declare_mapper:
11013     case OMPD_taskloop:
11014     case OMPD_taskloop_simd:
11015     case OMPD_master_taskloop:
11016     case OMPD_master_taskloop_simd:
11017     case OMPD_parallel_master_taskloop:
11018     case OMPD_parallel_master_taskloop_simd:
11019     case OMPD_target:
11020     case OMPD_target_simd:
11021     case OMPD_target_teams_distribute:
11022     case OMPD_target_teams_distribute_simd:
11023     case OMPD_target_teams_distribute_parallel_for:
11024     case OMPD_target_teams_distribute_parallel_for_simd:
11025     case OMPD_target_teams:
11026     case OMPD_target_parallel:
11027     case OMPD_target_parallel_for:
11028     case OMPD_target_parallel_for_simd:
11029     case OMPD_requires:
11030     case OMPD_unknown:
11031     default:
11032       llvm_unreachable("Unexpected standalone target data directive.");
11033       break;
11034     }
11035     CGF.EmitRuntimeCall(
11036         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11037         OffloadingArgs);
11038   };
11039 
11040   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11041                           &MapNamesArray](CodeGenFunction &CGF,
11042                                           PrePostActionTy &) {
11043     // Fill up the arrays with all the mapped variables.
11044     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11045 
11046     // Get map clause information.
11047     MappableExprsHandler MEHandler(D, CGF);
11048     MEHandler.generateAllInfo(CombinedInfo);
11049 
11050     TargetDataInfo Info;
11051     // Fill up the arrays and create the arguments.
11052     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11053                          /*IsNonContiguous=*/true);
11054     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11055                              D.hasClausesOfKind<OMPNowaitClause>();
11056     emitOffloadingArraysArgument(
11057         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11058         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11059         {/*ForEndTask=*/false});
11060     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11061     InputInfo.BasePointersArray =
11062         Address(Info.BasePointersArray, CGM.getPointerAlign());
11063     InputInfo.PointersArray =
11064         Address(Info.PointersArray, CGM.getPointerAlign());
11065     InputInfo.SizesArray =
11066         Address(Info.SizesArray, CGM.getPointerAlign());
11067     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11068     MapTypesArray = Info.MapTypesArray;
11069     MapNamesArray = Info.MapNamesArray;
11070     if (RequiresOuterTask)
11071       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11072     else
11073       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11074   };
11075 
11076   if (IfCond) {
11077     emitIfClause(CGF, IfCond, TargetThenGen,
11078                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11079   } else {
11080     RegionCodeGenTy ThenRCG(TargetThenGen);
11081     ThenRCG(CGF);
11082   }
11083 }
11084 
11085 namespace {
11086   /// Kind of parameter in a function with 'declare simd' directive.
11087   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11088   /// Attribute set of the parameter.
11089   struct ParamAttrTy {
11090     ParamKindTy Kind = Vector;
11091     llvm::APSInt StrideOrArg;
11092     llvm::APSInt Alignment;
11093   };
11094 } // namespace
11095 
11096 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11097                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11098   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11099   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11100   // of that clause. The VLEN value must be power of 2.
11101   // In other case the notion of the function`s "characteristic data type" (CDT)
11102   // is used to compute the vector length.
11103   // CDT is defined in the following order:
11104   //   a) For non-void function, the CDT is the return type.
11105   //   b) If the function has any non-uniform, non-linear parameters, then the
11106   //   CDT is the type of the first such parameter.
11107   //   c) If the CDT determined by a) or b) above is struct, union, or class
11108   //   type which is pass-by-value (except for the type that maps to the
11109   //   built-in complex data type), the characteristic data type is int.
11110   //   d) If none of the above three cases is applicable, the CDT is int.
11111   // The VLEN is then determined based on the CDT and the size of vector
11112   // register of that ISA for which current vector version is generated. The
11113   // VLEN is computed using the formula below:
11114   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11115   // where vector register size specified in section 3.2.1 Registers and the
11116   // Stack Frame of original AMD64 ABI document.
11117   QualType RetType = FD->getReturnType();
11118   if (RetType.isNull())
11119     return 0;
11120   ASTContext &C = FD->getASTContext();
11121   QualType CDT;
11122   if (!RetType.isNull() && !RetType->isVoidType()) {
11123     CDT = RetType;
11124   } else {
11125     unsigned Offset = 0;
11126     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11127       if (ParamAttrs[Offset].Kind == Vector)
11128         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11129       ++Offset;
11130     }
11131     if (CDT.isNull()) {
11132       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11133         if (ParamAttrs[I + Offset].Kind == Vector) {
11134           CDT = FD->getParamDecl(I)->getType();
11135           break;
11136         }
11137       }
11138     }
11139   }
11140   if (CDT.isNull())
11141     CDT = C.IntTy;
11142   CDT = CDT->getCanonicalTypeUnqualified();
11143   if (CDT->isRecordType() || CDT->isUnionType())
11144     CDT = C.IntTy;
11145   return C.getTypeSize(CDT);
11146 }
11147 
11148 static void
11149 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11150                            const llvm::APSInt &VLENVal,
11151                            ArrayRef<ParamAttrTy> ParamAttrs,
11152                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11153   struct ISADataTy {
11154     char ISA;
11155     unsigned VecRegSize;
11156   };
11157   ISADataTy ISAData[] = {
11158       {
11159           'b', 128
11160       }, // SSE
11161       {
11162           'c', 256
11163       }, // AVX
11164       {
11165           'd', 256
11166       }, // AVX2
11167       {
11168           'e', 512
11169       }, // AVX512
11170   };
11171   llvm::SmallVector<char, 2> Masked;
11172   switch (State) {
11173   case OMPDeclareSimdDeclAttr::BS_Undefined:
11174     Masked.push_back('N');
11175     Masked.push_back('M');
11176     break;
11177   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11178     Masked.push_back('N');
11179     break;
11180   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11181     Masked.push_back('M');
11182     break;
11183   }
11184   for (char Mask : Masked) {
11185     for (const ISADataTy &Data : ISAData) {
11186       SmallString<256> Buffer;
11187       llvm::raw_svector_ostream Out(Buffer);
11188       Out << "_ZGV" << Data.ISA << Mask;
11189       if (!VLENVal) {
11190         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11191         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11192         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11193       } else {
11194         Out << VLENVal;
11195       }
11196       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11197         switch (ParamAttr.Kind){
11198         case LinearWithVarStride:
11199           Out << 's' << ParamAttr.StrideOrArg;
11200           break;
11201         case Linear:
11202           Out << 'l';
11203           if (ParamAttr.StrideOrArg != 1)
11204             Out << ParamAttr.StrideOrArg;
11205           break;
11206         case Uniform:
11207           Out << 'u';
11208           break;
11209         case Vector:
11210           Out << 'v';
11211           break;
11212         }
11213         if (!!ParamAttr.Alignment)
11214           Out << 'a' << ParamAttr.Alignment;
11215       }
11216       Out << '_' << Fn->getName();
11217       Fn->addFnAttr(Out.str());
11218     }
11219   }
11220 }
11221 
11222 // This are the Functions that are needed to mangle the name of the
11223 // vector functions generated by the compiler, according to the rules
11224 // defined in the "Vector Function ABI specifications for AArch64",
11225 // available at
11226 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11227 
11228 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11229 ///
11230 /// TODO: Need to implement the behavior for reference marked with a
11231 /// var or no linear modifiers (1.b in the section). For this, we
11232 /// need to extend ParamKindTy to support the linear modifiers.
11233 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11234   QT = QT.getCanonicalType();
11235 
11236   if (QT->isVoidType())
11237     return false;
11238 
11239   if (Kind == ParamKindTy::Uniform)
11240     return false;
11241 
11242   if (Kind == ParamKindTy::Linear)
11243     return false;
11244 
11245   // TODO: Handle linear references with modifiers
11246 
11247   if (Kind == ParamKindTy::LinearWithVarStride)
11248     return false;
11249 
11250   return true;
11251 }
11252 
11253 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11254 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11255   QT = QT.getCanonicalType();
11256   unsigned Size = C.getTypeSize(QT);
11257 
11258   // Only scalars and complex within 16 bytes wide set PVB to true.
11259   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11260     return false;
11261 
11262   if (QT->isFloatingType())
11263     return true;
11264 
11265   if (QT->isIntegerType())
11266     return true;
11267 
11268   if (QT->isPointerType())
11269     return true;
11270 
11271   // TODO: Add support for complex types (section 3.1.2, item 2).
11272 
11273   return false;
11274 }
11275 
11276 /// Computes the lane size (LS) of a return type or of an input parameter,
11277 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11278 /// TODO: Add support for references, section 3.2.1, item 1.
11279 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11280   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11281     QualType PTy = QT.getCanonicalType()->getPointeeType();
11282     if (getAArch64PBV(PTy, C))
11283       return C.getTypeSize(PTy);
11284   }
11285   if (getAArch64PBV(QT, C))
11286     return C.getTypeSize(QT);
11287 
11288   return C.getTypeSize(C.getUIntPtrType());
11289 }
11290 
11291 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11292 // signature of the scalar function, as defined in 3.2.2 of the
11293 // AAVFABI.
11294 static std::tuple<unsigned, unsigned, bool>
11295 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11296   QualType RetType = FD->getReturnType().getCanonicalType();
11297 
11298   ASTContext &C = FD->getASTContext();
11299 
11300   bool OutputBecomesInput = false;
11301 
11302   llvm::SmallVector<unsigned, 8> Sizes;
11303   if (!RetType->isVoidType()) {
11304     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11305     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11306       OutputBecomesInput = true;
11307   }
11308   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11309     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11310     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11311   }
11312 
11313   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11314   // The LS of a function parameter / return value can only be a power
11315   // of 2, starting from 8 bits, up to 128.
11316   assert(std::all_of(Sizes.begin(), Sizes.end(),
11317                      [](unsigned Size) {
11318                        return Size == 8 || Size == 16 || Size == 32 ||
11319                               Size == 64 || Size == 128;
11320                      }) &&
11321          "Invalid size");
11322 
11323   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11324                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11325                          OutputBecomesInput);
11326 }
11327 
11328 /// Mangle the parameter part of the vector function name according to
11329 /// their OpenMP classification. The mangling function is defined in
11330 /// section 3.5 of the AAVFABI.
11331 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11332   SmallString<256> Buffer;
11333   llvm::raw_svector_ostream Out(Buffer);
11334   for (const auto &ParamAttr : ParamAttrs) {
11335     switch (ParamAttr.Kind) {
11336     case LinearWithVarStride:
11337       Out << "ls" << ParamAttr.StrideOrArg;
11338       break;
11339     case Linear:
11340       Out << 'l';
11341       // Don't print the step value if it is not present or if it is
11342       // equal to 1.
11343       if (ParamAttr.StrideOrArg != 1)
11344         Out << ParamAttr.StrideOrArg;
11345       break;
11346     case Uniform:
11347       Out << 'u';
11348       break;
11349     case Vector:
11350       Out << 'v';
11351       break;
11352     }
11353 
11354     if (!!ParamAttr.Alignment)
11355       Out << 'a' << ParamAttr.Alignment;
11356   }
11357 
11358   return std::string(Out.str());
11359 }
11360 
11361 // Function used to add the attribute. The parameter `VLEN` is
11362 // templated to allow the use of "x" when targeting scalable functions
11363 // for SVE.
11364 template <typename T>
11365 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11366                                  char ISA, StringRef ParSeq,
11367                                  StringRef MangledName, bool OutputBecomesInput,
11368                                  llvm::Function *Fn) {
11369   SmallString<256> Buffer;
11370   llvm::raw_svector_ostream Out(Buffer);
11371   Out << Prefix << ISA << LMask << VLEN;
11372   if (OutputBecomesInput)
11373     Out << "v";
11374   Out << ParSeq << "_" << MangledName;
11375   Fn->addFnAttr(Out.str());
11376 }
11377 
11378 // Helper function to generate the Advanced SIMD names depending on
11379 // the value of the NDS when simdlen is not present.
11380 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11381                                       StringRef Prefix, char ISA,
11382                                       StringRef ParSeq, StringRef MangledName,
11383                                       bool OutputBecomesInput,
11384                                       llvm::Function *Fn) {
11385   switch (NDS) {
11386   case 8:
11387     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11388                          OutputBecomesInput, Fn);
11389     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11390                          OutputBecomesInput, Fn);
11391     break;
11392   case 16:
11393     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11394                          OutputBecomesInput, Fn);
11395     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11396                          OutputBecomesInput, Fn);
11397     break;
11398   case 32:
11399     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11400                          OutputBecomesInput, Fn);
11401     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11402                          OutputBecomesInput, Fn);
11403     break;
11404   case 64:
11405   case 128:
11406     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11407                          OutputBecomesInput, Fn);
11408     break;
11409   default:
11410     llvm_unreachable("Scalar type is too wide.");
11411   }
11412 }
11413 
11414 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11415 static void emitAArch64DeclareSimdFunction(
11416     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11417     ArrayRef<ParamAttrTy> ParamAttrs,
11418     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11419     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11420 
11421   // Get basic data for building the vector signature.
11422   const auto Data = getNDSWDS(FD, ParamAttrs);
11423   const unsigned NDS = std::get<0>(Data);
11424   const unsigned WDS = std::get<1>(Data);
11425   const bool OutputBecomesInput = std::get<2>(Data);
11426 
11427   // Check the values provided via `simdlen` by the user.
11428   // 1. A `simdlen(1)` doesn't produce vector signatures,
11429   if (UserVLEN == 1) {
11430     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11431         DiagnosticsEngine::Warning,
11432         "The clause simdlen(1) has no effect when targeting aarch64.");
11433     CGM.getDiags().Report(SLoc, DiagID);
11434     return;
11435   }
11436 
11437   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11438   // Advanced SIMD output.
11439   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11440     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11441         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11442                                     "power of 2 when targeting Advanced SIMD.");
11443     CGM.getDiags().Report(SLoc, DiagID);
11444     return;
11445   }
11446 
11447   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11448   // limits.
11449   if (ISA == 's' && UserVLEN != 0) {
11450     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11451       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11452           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11453                                       "lanes in the architectural constraints "
11454                                       "for SVE (min is 128-bit, max is "
11455                                       "2048-bit, by steps of 128-bit)");
11456       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11457       return;
11458     }
11459   }
11460 
11461   // Sort out parameter sequence.
11462   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11463   StringRef Prefix = "_ZGV";
11464   // Generate simdlen from user input (if any).
11465   if (UserVLEN) {
11466     if (ISA == 's') {
11467       // SVE generates only a masked function.
11468       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11469                            OutputBecomesInput, Fn);
11470     } else {
11471       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11472       // Advanced SIMD generates one or two functions, depending on
11473       // the `[not]inbranch` clause.
11474       switch (State) {
11475       case OMPDeclareSimdDeclAttr::BS_Undefined:
11476         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11477                              OutputBecomesInput, Fn);
11478         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11479                              OutputBecomesInput, Fn);
11480         break;
11481       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11482         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11483                              OutputBecomesInput, Fn);
11484         break;
11485       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11486         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11487                              OutputBecomesInput, Fn);
11488         break;
11489       }
11490     }
11491   } else {
11492     // If no user simdlen is provided, follow the AAVFABI rules for
11493     // generating the vector length.
11494     if (ISA == 's') {
11495       // SVE, section 3.4.1, item 1.
11496       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11497                            OutputBecomesInput, Fn);
11498     } else {
11499       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11500       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11501       // two vector names depending on the use of the clause
11502       // `[not]inbranch`.
11503       switch (State) {
11504       case OMPDeclareSimdDeclAttr::BS_Undefined:
11505         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11506                                   OutputBecomesInput, Fn);
11507         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11508                                   OutputBecomesInput, Fn);
11509         break;
11510       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11511         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11512                                   OutputBecomesInput, Fn);
11513         break;
11514       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11515         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11516                                   OutputBecomesInput, Fn);
11517         break;
11518       }
11519     }
11520   }
11521 }
11522 
11523 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11524                                               llvm::Function *Fn) {
11525   ASTContext &C = CGM.getContext();
11526   FD = FD->getMostRecentDecl();
11527   // Map params to their positions in function decl.
11528   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11529   if (isa<CXXMethodDecl>(FD))
11530     ParamPositions.try_emplace(FD, 0);
11531   unsigned ParamPos = ParamPositions.size();
11532   for (const ParmVarDecl *P : FD->parameters()) {
11533     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11534     ++ParamPos;
11535   }
11536   while (FD) {
11537     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11538       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11539       // Mark uniform parameters.
11540       for (const Expr *E : Attr->uniforms()) {
11541         E = E->IgnoreParenImpCasts();
11542         unsigned Pos;
11543         if (isa<CXXThisExpr>(E)) {
11544           Pos = ParamPositions[FD];
11545         } else {
11546           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11547                                 ->getCanonicalDecl();
11548           Pos = ParamPositions[PVD];
11549         }
11550         ParamAttrs[Pos].Kind = Uniform;
11551       }
11552       // Get alignment info.
11553       auto NI = Attr->alignments_begin();
11554       for (const Expr *E : Attr->aligneds()) {
11555         E = E->IgnoreParenImpCasts();
11556         unsigned Pos;
11557         QualType ParmTy;
11558         if (isa<CXXThisExpr>(E)) {
11559           Pos = ParamPositions[FD];
11560           ParmTy = E->getType();
11561         } else {
11562           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11563                                 ->getCanonicalDecl();
11564           Pos = ParamPositions[PVD];
11565           ParmTy = PVD->getType();
11566         }
11567         ParamAttrs[Pos].Alignment =
11568             (*NI)
11569                 ? (*NI)->EvaluateKnownConstInt(C)
11570                 : llvm::APSInt::getUnsigned(
11571                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11572                           .getQuantity());
11573         ++NI;
11574       }
11575       // Mark linear parameters.
11576       auto SI = Attr->steps_begin();
11577       auto MI = Attr->modifiers_begin();
11578       for (const Expr *E : Attr->linears()) {
11579         E = E->IgnoreParenImpCasts();
11580         unsigned Pos;
11581         // Rescaling factor needed to compute the linear parameter
11582         // value in the mangled name.
11583         unsigned PtrRescalingFactor = 1;
11584         if (isa<CXXThisExpr>(E)) {
11585           Pos = ParamPositions[FD];
11586         } else {
11587           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11588                                 ->getCanonicalDecl();
11589           Pos = ParamPositions[PVD];
11590           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11591             PtrRescalingFactor = CGM.getContext()
11592                                      .getTypeSizeInChars(P->getPointeeType())
11593                                      .getQuantity();
11594         }
11595         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11596         ParamAttr.Kind = Linear;
11597         // Assuming a stride of 1, for `linear` without modifiers.
11598         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11599         if (*SI) {
11600           Expr::EvalResult Result;
11601           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11602             if (const auto *DRE =
11603                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11604               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11605                 ParamAttr.Kind = LinearWithVarStride;
11606                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11607                     ParamPositions[StridePVD->getCanonicalDecl()]);
11608               }
11609             }
11610           } else {
11611             ParamAttr.StrideOrArg = Result.Val.getInt();
11612           }
11613         }
11614         // If we are using a linear clause on a pointer, we need to
11615         // rescale the value of linear_step with the byte size of the
11616         // pointee type.
11617         if (Linear == ParamAttr.Kind)
11618           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11619         ++SI;
11620         ++MI;
11621       }
11622       llvm::APSInt VLENVal;
11623       SourceLocation ExprLoc;
11624       const Expr *VLENExpr = Attr->getSimdlen();
11625       if (VLENExpr) {
11626         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11627         ExprLoc = VLENExpr->getExprLoc();
11628       }
11629       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11630       if (CGM.getTriple().isX86()) {
11631         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11632       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11633         unsigned VLEN = VLENVal.getExtValue();
11634         StringRef MangledName = Fn->getName();
11635         if (CGM.getTarget().hasFeature("sve"))
11636           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11637                                          MangledName, 's', 128, Fn, ExprLoc);
11638         if (CGM.getTarget().hasFeature("neon"))
11639           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11640                                          MangledName, 'n', 128, Fn, ExprLoc);
11641       }
11642     }
11643     FD = FD->getPreviousDecl();
11644   }
11645 }
11646 
11647 namespace {
11648 /// Cleanup action for doacross support.
11649 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11650 public:
11651   static const int DoacrossFinArgs = 2;
11652 
11653 private:
11654   llvm::FunctionCallee RTLFn;
11655   llvm::Value *Args[DoacrossFinArgs];
11656 
11657 public:
11658   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11659                     ArrayRef<llvm::Value *> CallArgs)
11660       : RTLFn(RTLFn) {
11661     assert(CallArgs.size() == DoacrossFinArgs);
11662     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11663   }
11664   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11665     if (!CGF.HaveInsertPoint())
11666       return;
11667     CGF.EmitRuntimeCall(RTLFn, Args);
11668   }
11669 };
11670 } // namespace
11671 
11672 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11673                                        const OMPLoopDirective &D,
11674                                        ArrayRef<Expr *> NumIterations) {
11675   if (!CGF.HaveInsertPoint())
11676     return;
11677 
11678   ASTContext &C = CGM.getContext();
11679   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11680   RecordDecl *RD;
11681   if (KmpDimTy.isNull()) {
11682     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11683     //  kmp_int64 lo; // lower
11684     //  kmp_int64 up; // upper
11685     //  kmp_int64 st; // stride
11686     // };
11687     RD = C.buildImplicitRecord("kmp_dim");
11688     RD->startDefinition();
11689     addFieldToRecordDecl(C, RD, Int64Ty);
11690     addFieldToRecordDecl(C, RD, Int64Ty);
11691     addFieldToRecordDecl(C, RD, Int64Ty);
11692     RD->completeDefinition();
11693     KmpDimTy = C.getRecordType(RD);
11694   } else {
11695     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11696   }
11697   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11698   QualType ArrayTy =
11699       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11700 
11701   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11702   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11703   enum { LowerFD = 0, UpperFD, StrideFD };
11704   // Fill dims with data.
11705   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11706     LValue DimsLVal = CGF.MakeAddrLValue(
11707         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11708     // dims.upper = num_iterations;
11709     LValue UpperLVal = CGF.EmitLValueForField(
11710         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11711     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11712         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11713         Int64Ty, NumIterations[I]->getExprLoc());
11714     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11715     // dims.stride = 1;
11716     LValue StrideLVal = CGF.EmitLValueForField(
11717         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11718     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11719                           StrideLVal);
11720   }
11721 
11722   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11723   // kmp_int32 num_dims, struct kmp_dim * dims);
11724   llvm::Value *Args[] = {
11725       emitUpdateLocation(CGF, D.getBeginLoc()),
11726       getThreadID(CGF, D.getBeginLoc()),
11727       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11728       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11729           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11730           CGM.VoidPtrTy)};
11731 
11732   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11733       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11734   CGF.EmitRuntimeCall(RTLFn, Args);
11735   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11736       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11737   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11738       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11739   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11740                                              llvm::makeArrayRef(FiniArgs));
11741 }
11742 
11743 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11744                                           const OMPDependClause *C) {
11745   QualType Int64Ty =
11746       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11747   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11748   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11749       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11750   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11751   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11752     const Expr *CounterVal = C->getLoopData(I);
11753     assert(CounterVal);
11754     llvm::Value *CntVal = CGF.EmitScalarConversion(
11755         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11756         CounterVal->getExprLoc());
11757     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11758                           /*Volatile=*/false, Int64Ty);
11759   }
11760   llvm::Value *Args[] = {
11761       emitUpdateLocation(CGF, C->getBeginLoc()),
11762       getThreadID(CGF, C->getBeginLoc()),
11763       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11764   llvm::FunctionCallee RTLFn;
11765   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11766     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11767                                                   OMPRTL___kmpc_doacross_post);
11768   } else {
11769     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11770     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11771                                                   OMPRTL___kmpc_doacross_wait);
11772   }
11773   CGF.EmitRuntimeCall(RTLFn, Args);
11774 }
11775 
11776 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11777                                llvm::FunctionCallee Callee,
11778                                ArrayRef<llvm::Value *> Args) const {
11779   assert(Loc.isValid() && "Outlined function call location must be valid.");
11780   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11781 
11782   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11783     if (Fn->doesNotThrow()) {
11784       CGF.EmitNounwindRuntimeCall(Fn, Args);
11785       return;
11786     }
11787   }
11788   CGF.EmitRuntimeCall(Callee, Args);
11789 }
11790 
11791 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11792     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11793     ArrayRef<llvm::Value *> Args) const {
11794   emitCall(CGF, Loc, OutlinedFn, Args);
11795 }
11796 
11797 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11798   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11799     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11800       HasEmittedDeclareTargetRegion = true;
11801 }
11802 
11803 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11804                                              const VarDecl *NativeParam,
11805                                              const VarDecl *TargetParam) const {
11806   return CGF.GetAddrOfLocalVar(NativeParam);
11807 }
11808 
11809 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11810                                                    const VarDecl *VD) {
11811   if (!VD)
11812     return Address::invalid();
11813   Address UntiedAddr = Address::invalid();
11814   Address UntiedRealAddr = Address::invalid();
11815   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11816   if (It != FunctionToUntiedTaskStackMap.end()) {
11817     const UntiedLocalVarsAddressesMap &UntiedData =
11818         UntiedLocalVarsStack[It->second];
11819     auto I = UntiedData.find(VD);
11820     if (I != UntiedData.end()) {
11821       UntiedAddr = I->second.first;
11822       UntiedRealAddr = I->second.second;
11823     }
11824   }
11825   const VarDecl *CVD = VD->getCanonicalDecl();
11826   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11827     // Use the default allocation.
11828     if (!isAllocatableDecl(VD))
11829       return UntiedAddr;
11830     llvm::Value *Size;
11831     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11832     if (CVD->getType()->isVariablyModifiedType()) {
11833       Size = CGF.getTypeSize(CVD->getType());
11834       // Align the size: ((size + align - 1) / align) * align
11835       Size = CGF.Builder.CreateNUWAdd(
11836           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11837       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11838       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11839     } else {
11840       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11841       Size = CGM.getSize(Sz.alignTo(Align));
11842     }
11843     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11844     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11845     assert(AA->getAllocator() &&
11846            "Expected allocator expression for non-default allocator.");
11847     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11848     // According to the standard, the original allocator type is a enum
11849     // (integer). Convert to pointer type, if required.
11850     Allocator = CGF.EmitScalarConversion(
11851         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11852         AA->getAllocator()->getExprLoc());
11853     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11854 
11855     llvm::Value *Addr =
11856         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11857                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11858                             Args, getName({CVD->getName(), ".void.addr"}));
11859     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11860         CGM.getModule(), OMPRTL___kmpc_free);
11861     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11862     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11863         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11864     if (UntiedAddr.isValid())
11865       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11866 
11867     // Cleanup action for allocate support.
11868     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11869       llvm::FunctionCallee RTLFn;
11870       unsigned LocEncoding;
11871       Address Addr;
11872       const Expr *Allocator;
11873 
11874     public:
11875       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11876                            Address Addr, const Expr *Allocator)
11877           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11878             Allocator(Allocator) {}
11879       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11880         if (!CGF.HaveInsertPoint())
11881           return;
11882         llvm::Value *Args[3];
11883         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11884             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11885         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11886             Addr.getPointer(), CGF.VoidPtrTy);
11887         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11888         // According to the standard, the original allocator type is a enum
11889         // (integer). Convert to pointer type, if required.
11890         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11891                                             CGF.getContext().VoidPtrTy,
11892                                             Allocator->getExprLoc());
11893         Args[2] = AllocVal;
11894 
11895         CGF.EmitRuntimeCall(RTLFn, Args);
11896       }
11897     };
11898     Address VDAddr =
11899         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11900     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11901         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11902         VDAddr, AA->getAllocator());
11903     if (UntiedRealAddr.isValid())
11904       if (auto *Region =
11905               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11906         Region->emitUntiedSwitch(CGF);
11907     return VDAddr;
11908   }
11909   return UntiedAddr;
11910 }
11911 
11912 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11913                                              const VarDecl *VD) const {
11914   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11915   if (It == FunctionToUntiedTaskStackMap.end())
11916     return false;
11917   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11918 }
11919 
11920 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11921     CodeGenModule &CGM, const OMPLoopDirective &S)
11922     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11923   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11924   if (!NeedToPush)
11925     return;
11926   NontemporalDeclsSet &DS =
11927       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11928   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11929     for (const Stmt *Ref : C->private_refs()) {
11930       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11931       const ValueDecl *VD;
11932       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11933         VD = DRE->getDecl();
11934       } else {
11935         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11936         assert((ME->isImplicitCXXThis() ||
11937                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11938                "Expected member of current class.");
11939         VD = ME->getMemberDecl();
11940       }
11941       DS.insert(VD);
11942     }
11943   }
11944 }
11945 
11946 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11947   if (!NeedToPush)
11948     return;
11949   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11950 }
11951 
11952 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11953     CodeGenFunction &CGF,
11954     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11955                          std::pair<Address, Address>> &LocalVars)
11956     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11957   if (!NeedToPush)
11958     return;
11959   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11960       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11961   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11962 }
11963 
11964 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11965   if (!NeedToPush)
11966     return;
11967   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11968 }
11969 
11970 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11971   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11972 
11973   return llvm::any_of(
11974       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11975       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11976 }
11977 
11978 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11979     const OMPExecutableDirective &S,
11980     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11981     const {
11982   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11983   // Vars in target/task regions must be excluded completely.
11984   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11985       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11986     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11987     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11988     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11989     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11990       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11991         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11992     }
11993   }
11994   // Exclude vars in private clauses.
11995   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11996     for (const Expr *Ref : C->varlists()) {
11997       if (!Ref->getType()->isScalarType())
11998         continue;
11999       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12000       if (!DRE)
12001         continue;
12002       NeedToCheckForLPCs.insert(DRE->getDecl());
12003     }
12004   }
12005   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12006     for (const Expr *Ref : C->varlists()) {
12007       if (!Ref->getType()->isScalarType())
12008         continue;
12009       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12010       if (!DRE)
12011         continue;
12012       NeedToCheckForLPCs.insert(DRE->getDecl());
12013     }
12014   }
12015   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12016     for (const Expr *Ref : C->varlists()) {
12017       if (!Ref->getType()->isScalarType())
12018         continue;
12019       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12020       if (!DRE)
12021         continue;
12022       NeedToCheckForLPCs.insert(DRE->getDecl());
12023     }
12024   }
12025   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12026     for (const Expr *Ref : C->varlists()) {
12027       if (!Ref->getType()->isScalarType())
12028         continue;
12029       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12030       if (!DRE)
12031         continue;
12032       NeedToCheckForLPCs.insert(DRE->getDecl());
12033     }
12034   }
12035   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12036     for (const Expr *Ref : C->varlists()) {
12037       if (!Ref->getType()->isScalarType())
12038         continue;
12039       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12040       if (!DRE)
12041         continue;
12042       NeedToCheckForLPCs.insert(DRE->getDecl());
12043     }
12044   }
12045   for (const Decl *VD : NeedToCheckForLPCs) {
12046     for (const LastprivateConditionalData &Data :
12047          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12048       if (Data.DeclToUniqueName.count(VD) > 0) {
12049         if (!Data.Disabled)
12050           NeedToAddForLPCsAsDisabled.insert(VD);
12051         break;
12052       }
12053     }
12054   }
12055 }
12056 
12057 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12058     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12059     : CGM(CGF.CGM),
12060       Action((CGM.getLangOpts().OpenMP >= 50 &&
12061               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12062                            [](const OMPLastprivateClause *C) {
12063                              return C->getKind() ==
12064                                     OMPC_LASTPRIVATE_conditional;
12065                            }))
12066                  ? ActionToDo::PushAsLastprivateConditional
12067                  : ActionToDo::DoNotPush) {
12068   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12069   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12070     return;
12071   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12072          "Expected a push action.");
12073   LastprivateConditionalData &Data =
12074       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12075   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12076     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12077       continue;
12078 
12079     for (const Expr *Ref : C->varlists()) {
12080       Data.DeclToUniqueName.insert(std::make_pair(
12081           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12082           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12083     }
12084   }
12085   Data.IVLVal = IVLVal;
12086   Data.Fn = CGF.CurFn;
12087 }
12088 
12089 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12090     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12091     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12092   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12093   if (CGM.getLangOpts().OpenMP < 50)
12094     return;
12095   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12096   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12097   if (!NeedToAddForLPCsAsDisabled.empty()) {
12098     Action = ActionToDo::DisableLastprivateConditional;
12099     LastprivateConditionalData &Data =
12100         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12101     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12102       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12103     Data.Fn = CGF.CurFn;
12104     Data.Disabled = true;
12105   }
12106 }
12107 
12108 CGOpenMPRuntime::LastprivateConditionalRAII
12109 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12110     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12111   return LastprivateConditionalRAII(CGF, S);
12112 }
12113 
12114 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12115   if (CGM.getLangOpts().OpenMP < 50)
12116     return;
12117   if (Action == ActionToDo::DisableLastprivateConditional) {
12118     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12119            "Expected list of disabled private vars.");
12120     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12121   }
12122   if (Action == ActionToDo::PushAsLastprivateConditional) {
12123     assert(
12124         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12125         "Expected list of lastprivate conditional vars.");
12126     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12127   }
12128 }
12129 
12130 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12131                                                         const VarDecl *VD) {
12132   ASTContext &C = CGM.getContext();
12133   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12134   if (I == LastprivateConditionalToTypes.end())
12135     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12136   QualType NewType;
12137   const FieldDecl *VDField;
12138   const FieldDecl *FiredField;
12139   LValue BaseLVal;
12140   auto VI = I->getSecond().find(VD);
12141   if (VI == I->getSecond().end()) {
12142     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12143     RD->startDefinition();
12144     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12145     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12146     RD->completeDefinition();
12147     NewType = C.getRecordType(RD);
12148     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12149     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12150     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12151   } else {
12152     NewType = std::get<0>(VI->getSecond());
12153     VDField = std::get<1>(VI->getSecond());
12154     FiredField = std::get<2>(VI->getSecond());
12155     BaseLVal = std::get<3>(VI->getSecond());
12156   }
12157   LValue FiredLVal =
12158       CGF.EmitLValueForField(BaseLVal, FiredField);
12159   CGF.EmitStoreOfScalar(
12160       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12161       FiredLVal);
12162   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12163 }
12164 
12165 namespace {
12166 /// Checks if the lastprivate conditional variable is referenced in LHS.
12167 class LastprivateConditionalRefChecker final
12168     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12169   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12170   const Expr *FoundE = nullptr;
12171   const Decl *FoundD = nullptr;
12172   StringRef UniqueDeclName;
12173   LValue IVLVal;
12174   llvm::Function *FoundFn = nullptr;
12175   SourceLocation Loc;
12176 
12177 public:
12178   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12179     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12180          llvm::reverse(LPM)) {
12181       auto It = D.DeclToUniqueName.find(E->getDecl());
12182       if (It == D.DeclToUniqueName.end())
12183         continue;
12184       if (D.Disabled)
12185         return false;
12186       FoundE = E;
12187       FoundD = E->getDecl()->getCanonicalDecl();
12188       UniqueDeclName = It->second;
12189       IVLVal = D.IVLVal;
12190       FoundFn = D.Fn;
12191       break;
12192     }
12193     return FoundE == E;
12194   }
12195   bool VisitMemberExpr(const MemberExpr *E) {
12196     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12197       return false;
12198     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12199          llvm::reverse(LPM)) {
12200       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12201       if (It == D.DeclToUniqueName.end())
12202         continue;
12203       if (D.Disabled)
12204         return false;
12205       FoundE = E;
12206       FoundD = E->getMemberDecl()->getCanonicalDecl();
12207       UniqueDeclName = It->second;
12208       IVLVal = D.IVLVal;
12209       FoundFn = D.Fn;
12210       break;
12211     }
12212     return FoundE == E;
12213   }
12214   bool VisitStmt(const Stmt *S) {
12215     for (const Stmt *Child : S->children()) {
12216       if (!Child)
12217         continue;
12218       if (const auto *E = dyn_cast<Expr>(Child))
12219         if (!E->isGLValue())
12220           continue;
12221       if (Visit(Child))
12222         return true;
12223     }
12224     return false;
12225   }
12226   explicit LastprivateConditionalRefChecker(
12227       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12228       : LPM(LPM) {}
12229   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12230   getFoundData() const {
12231     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12232   }
12233 };
12234 } // namespace
12235 
12236 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12237                                                        LValue IVLVal,
12238                                                        StringRef UniqueDeclName,
12239                                                        LValue LVal,
12240                                                        SourceLocation Loc) {
12241   // Last updated loop counter for the lastprivate conditional var.
12242   // int<xx> last_iv = 0;
12243   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12244   llvm::Constant *LastIV =
12245       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12246   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12247       IVLVal.getAlignment().getAsAlign());
12248   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12249 
12250   // Last value of the lastprivate conditional.
12251   // decltype(priv_a) last_a;
12252   llvm::Constant *Last = getOrCreateInternalVariable(
12253       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12254   cast<llvm::GlobalVariable>(Last)->setAlignment(
12255       LVal.getAlignment().getAsAlign());
12256   LValue LastLVal =
12257       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12258 
12259   // Global loop counter. Required to handle inner parallel-for regions.
12260   // iv
12261   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12262 
12263   // #pragma omp critical(a)
12264   // if (last_iv <= iv) {
12265   //   last_iv = iv;
12266   //   last_a = priv_a;
12267   // }
12268   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12269                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12270     Action.Enter(CGF);
12271     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12272     // (last_iv <= iv) ? Check if the variable is updated and store new
12273     // value in global var.
12274     llvm::Value *CmpRes;
12275     if (IVLVal.getType()->isSignedIntegerType()) {
12276       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12277     } else {
12278       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12279              "Loop iteration variable must be integer.");
12280       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12281     }
12282     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12283     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12284     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12285     // {
12286     CGF.EmitBlock(ThenBB);
12287 
12288     //   last_iv = iv;
12289     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12290 
12291     //   last_a = priv_a;
12292     switch (CGF.getEvaluationKind(LVal.getType())) {
12293     case TEK_Scalar: {
12294       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12295       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12296       break;
12297     }
12298     case TEK_Complex: {
12299       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12300       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12301       break;
12302     }
12303     case TEK_Aggregate:
12304       llvm_unreachable(
12305           "Aggregates are not supported in lastprivate conditional.");
12306     }
12307     // }
12308     CGF.EmitBranch(ExitBB);
12309     // There is no need to emit line number for unconditional branch.
12310     (void)ApplyDebugLocation::CreateEmpty(CGF);
12311     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12312   };
12313 
12314   if (CGM.getLangOpts().OpenMPSimd) {
12315     // Do not emit as a critical region as no parallel region could be emitted.
12316     RegionCodeGenTy ThenRCG(CodeGen);
12317     ThenRCG(CGF);
12318   } else {
12319     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12320   }
12321 }
12322 
12323 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12324                                                          const Expr *LHS) {
12325   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12326     return;
12327   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12328   if (!Checker.Visit(LHS))
12329     return;
12330   const Expr *FoundE;
12331   const Decl *FoundD;
12332   StringRef UniqueDeclName;
12333   LValue IVLVal;
12334   llvm::Function *FoundFn;
12335   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12336       Checker.getFoundData();
12337   if (FoundFn != CGF.CurFn) {
12338     // Special codegen for inner parallel regions.
12339     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12340     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12341     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12342            "Lastprivate conditional is not found in outer region.");
12343     QualType StructTy = std::get<0>(It->getSecond());
12344     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12345     LValue PrivLVal = CGF.EmitLValue(FoundE);
12346     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12347         PrivLVal.getAddress(CGF),
12348         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12349     LValue BaseLVal =
12350         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12351     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12352     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12353                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12354                         FiredLVal, llvm::AtomicOrdering::Unordered,
12355                         /*IsVolatile=*/true, /*isInit=*/false);
12356     return;
12357   }
12358 
12359   // Private address of the lastprivate conditional in the current context.
12360   // priv_a
12361   LValue LVal = CGF.EmitLValue(FoundE);
12362   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12363                                    FoundE->getExprLoc());
12364 }
12365 
12366 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12367     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12368     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12369   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12370     return;
12371   auto Range = llvm::reverse(LastprivateConditionalStack);
12372   auto It = llvm::find_if(
12373       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12374   if (It == Range.end() || It->Fn != CGF.CurFn)
12375     return;
12376   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12377   assert(LPCI != LastprivateConditionalToTypes.end() &&
12378          "Lastprivates must be registered already.");
12379   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12380   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12381   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12382   for (const auto &Pair : It->DeclToUniqueName) {
12383     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12384     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12385       continue;
12386     auto I = LPCI->getSecond().find(Pair.first);
12387     assert(I != LPCI->getSecond().end() &&
12388            "Lastprivate must be rehistered already.");
12389     // bool Cmp = priv_a.Fired != 0;
12390     LValue BaseLVal = std::get<3>(I->getSecond());
12391     LValue FiredLVal =
12392         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12393     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12394     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12395     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12396     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12397     // if (Cmp) {
12398     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12399     CGF.EmitBlock(ThenBB);
12400     Address Addr = CGF.GetAddrOfLocalVar(VD);
12401     LValue LVal;
12402     if (VD->getType()->isReferenceType())
12403       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12404                                            AlignmentSource::Decl);
12405     else
12406       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12407                                 AlignmentSource::Decl);
12408     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12409                                      D.getBeginLoc());
12410     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12411     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12412     // }
12413   }
12414 }
12415 
12416 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12417     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12418     SourceLocation Loc) {
12419   if (CGF.getLangOpts().OpenMP < 50)
12420     return;
12421   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12422   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12423          "Unknown lastprivate conditional variable.");
12424   StringRef UniqueName = It->second;
12425   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12426   // The variable was not updated in the region - exit.
12427   if (!GV)
12428     return;
12429   LValue LPLVal = CGF.MakeAddrLValue(
12430       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12431   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12432   CGF.EmitStoreOfScalar(Res, PrivLVal);
12433 }
12434 
12435 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12436     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12437     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12438   llvm_unreachable("Not supported in SIMD-only mode");
12439 }
12440 
12441 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12442     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12443     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12444   llvm_unreachable("Not supported in SIMD-only mode");
12445 }
12446 
12447 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12448     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12449     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12450     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12451     bool Tied, unsigned &NumberOfParts) {
12452   llvm_unreachable("Not supported in SIMD-only mode");
12453 }
12454 
12455 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12456                                            SourceLocation Loc,
12457                                            llvm::Function *OutlinedFn,
12458                                            ArrayRef<llvm::Value *> CapturedVars,
12459                                            const Expr *IfCond) {
12460   llvm_unreachable("Not supported in SIMD-only mode");
12461 }
12462 
12463 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12464     CodeGenFunction &CGF, StringRef CriticalName,
12465     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12466     const Expr *Hint) {
12467   llvm_unreachable("Not supported in SIMD-only mode");
12468 }
12469 
12470 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12471                                            const RegionCodeGenTy &MasterOpGen,
12472                                            SourceLocation Loc) {
12473   llvm_unreachable("Not supported in SIMD-only mode");
12474 }
12475 
12476 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12477                                             SourceLocation Loc) {
12478   llvm_unreachable("Not supported in SIMD-only mode");
12479 }
12480 
12481 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12482     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12483     SourceLocation Loc) {
12484   llvm_unreachable("Not supported in SIMD-only mode");
12485 }
12486 
12487 void CGOpenMPSIMDRuntime::emitSingleRegion(
12488     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12489     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12490     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12491     ArrayRef<const Expr *> AssignmentOps) {
12492   llvm_unreachable("Not supported in SIMD-only mode");
12493 }
12494 
12495 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12496                                             const RegionCodeGenTy &OrderedOpGen,
12497                                             SourceLocation Loc,
12498                                             bool IsThreads) {
12499   llvm_unreachable("Not supported in SIMD-only mode");
12500 }
12501 
12502 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12503                                           SourceLocation Loc,
12504                                           OpenMPDirectiveKind Kind,
12505                                           bool EmitChecks,
12506                                           bool ForceSimpleCall) {
12507   llvm_unreachable("Not supported in SIMD-only mode");
12508 }
12509 
12510 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12511     CodeGenFunction &CGF, SourceLocation Loc,
12512     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12513     bool Ordered, const DispatchRTInput &DispatchValues) {
12514   llvm_unreachable("Not supported in SIMD-only mode");
12515 }
12516 
12517 void CGOpenMPSIMDRuntime::emitForStaticInit(
12518     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12519     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12520   llvm_unreachable("Not supported in SIMD-only mode");
12521 }
12522 
12523 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12524     CodeGenFunction &CGF, SourceLocation Loc,
12525     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12526   llvm_unreachable("Not supported in SIMD-only mode");
12527 }
12528 
12529 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12530                                                      SourceLocation Loc,
12531                                                      unsigned IVSize,
12532                                                      bool IVSigned) {
12533   llvm_unreachable("Not supported in SIMD-only mode");
12534 }
12535 
12536 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12537                                               SourceLocation Loc,
12538                                               OpenMPDirectiveKind DKind) {
12539   llvm_unreachable("Not supported in SIMD-only mode");
12540 }
12541 
12542 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12543                                               SourceLocation Loc,
12544                                               unsigned IVSize, bool IVSigned,
12545                                               Address IL, Address LB,
12546                                               Address UB, Address ST) {
12547   llvm_unreachable("Not supported in SIMD-only mode");
12548 }
12549 
12550 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12551                                                llvm::Value *NumThreads,
12552                                                SourceLocation Loc) {
12553   llvm_unreachable("Not supported in SIMD-only mode");
12554 }
12555 
12556 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12557                                              ProcBindKind ProcBind,
12558                                              SourceLocation Loc) {
12559   llvm_unreachable("Not supported in SIMD-only mode");
12560 }
12561 
12562 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12563                                                     const VarDecl *VD,
12564                                                     Address VDAddr,
12565                                                     SourceLocation Loc) {
12566   llvm_unreachable("Not supported in SIMD-only mode");
12567 }
12568 
12569 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12570     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12571     CodeGenFunction *CGF) {
12572   llvm_unreachable("Not supported in SIMD-only mode");
12573 }
12574 
12575 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12576     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12577   llvm_unreachable("Not supported in SIMD-only mode");
12578 }
12579 
12580 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12581                                     ArrayRef<const Expr *> Vars,
12582                                     SourceLocation Loc,
12583                                     llvm::AtomicOrdering AO) {
12584   llvm_unreachable("Not supported in SIMD-only mode");
12585 }
12586 
12587 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12588                                        const OMPExecutableDirective &D,
12589                                        llvm::Function *TaskFunction,
12590                                        QualType SharedsTy, Address Shareds,
12591                                        const Expr *IfCond,
12592                                        const OMPTaskDataTy &Data) {
12593   llvm_unreachable("Not supported in SIMD-only mode");
12594 }
12595 
12596 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12597     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12598     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12599     const Expr *IfCond, const OMPTaskDataTy &Data) {
12600   llvm_unreachable("Not supported in SIMD-only mode");
12601 }
12602 
12603 void CGOpenMPSIMDRuntime::emitReduction(
12604     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12605     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12606     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12607   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12608   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12609                                  ReductionOps, Options);
12610 }
12611 
12612 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12613     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12614     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12615   llvm_unreachable("Not supported in SIMD-only mode");
12616 }
12617 
12618 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12619                                                 SourceLocation Loc,
12620                                                 bool IsWorksharingReduction) {
12621   llvm_unreachable("Not supported in SIMD-only mode");
12622 }
12623 
12624 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12625                                                   SourceLocation Loc,
12626                                                   ReductionCodeGen &RCG,
12627                                                   unsigned N) {
12628   llvm_unreachable("Not supported in SIMD-only mode");
12629 }
12630 
12631 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12632                                                   SourceLocation Loc,
12633                                                   llvm::Value *ReductionsPtr,
12634                                                   LValue SharedLVal) {
12635   llvm_unreachable("Not supported in SIMD-only mode");
12636 }
12637 
12638 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12639                                            SourceLocation Loc) {
12640   llvm_unreachable("Not supported in SIMD-only mode");
12641 }
12642 
12643 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12644     CodeGenFunction &CGF, SourceLocation Loc,
12645     OpenMPDirectiveKind CancelRegion) {
12646   llvm_unreachable("Not supported in SIMD-only mode");
12647 }
12648 
12649 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12650                                          SourceLocation Loc, const Expr *IfCond,
12651                                          OpenMPDirectiveKind CancelRegion) {
12652   llvm_unreachable("Not supported in SIMD-only mode");
12653 }
12654 
12655 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12656     const OMPExecutableDirective &D, StringRef ParentName,
12657     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12658     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12659   llvm_unreachable("Not supported in SIMD-only mode");
12660 }
12661 
12662 void CGOpenMPSIMDRuntime::emitTargetCall(
12663     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12664     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12665     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12666     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12667                                      const OMPLoopDirective &D)>
12668         SizeEmitter) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12673   llvm_unreachable("Not supported in SIMD-only mode");
12674 }
12675 
12676 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12677   llvm_unreachable("Not supported in SIMD-only mode");
12678 }
12679 
12680 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12681   return false;
12682 }
12683 
12684 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12685                                         const OMPExecutableDirective &D,
12686                                         SourceLocation Loc,
12687                                         llvm::Function *OutlinedFn,
12688                                         ArrayRef<llvm::Value *> CapturedVars) {
12689   llvm_unreachable("Not supported in SIMD-only mode");
12690 }
12691 
12692 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12693                                              const Expr *NumTeams,
12694                                              const Expr *ThreadLimit,
12695                                              SourceLocation Loc) {
12696   llvm_unreachable("Not supported in SIMD-only mode");
12697 }
12698 
12699 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12700     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12701     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12702   llvm_unreachable("Not supported in SIMD-only mode");
12703 }
12704 
12705 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12706     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12707     const Expr *Device) {
12708   llvm_unreachable("Not supported in SIMD-only mode");
12709 }
12710 
12711 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12712                                            const OMPLoopDirective &D,
12713                                            ArrayRef<Expr *> NumIterations) {
12714   llvm_unreachable("Not supported in SIMD-only mode");
12715 }
12716 
12717 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12718                                               const OMPDependClause *C) {
12719   llvm_unreachable("Not supported in SIMD-only mode");
12720 }
12721 
12722 const VarDecl *
12723 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12724                                         const VarDecl *NativeParam) const {
12725   llvm_unreachable("Not supported in SIMD-only mode");
12726 }
12727 
12728 Address
12729 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12730                                          const VarDecl *NativeParam,
12731                                          const VarDecl *TargetParam) const {
12732   llvm_unreachable("Not supported in SIMD-only mode");
12733 }
12734