xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision d56accc7c3dcc897489b6a07834763a03b9f3d68)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   const auto *PrivateVD =
829       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830   QualType PrivateType = PrivateVD->getType();
831   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832   if (!PrivateType->isVariablyModifiedType()) {
833     Sizes.emplace_back(
834         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835         nullptr);
836     return;
837   }
838   llvm::Value *Size;
839   llvm::Value *SizeInChars;
840   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(ElemType,
844                                      OrigAddresses[N].second.getPointer(CGF),
845                                      OrigAddresses[N].first.getPointer(CGF));
846     Size = CGF.Builder.CreateNUWAdd(
847         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
848     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
849   } else {
850     SizeInChars =
851         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
852     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
853   }
854   Sizes.emplace_back(SizeInChars, Size);
855   CodeGenFunction::OpaqueValueMapping OpaqueMap(
856       CGF,
857       cast<OpaqueValueExpr>(
858           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
859       RValue::get(Size));
860   CGF.EmitVariablyModifiedType(PrivateType);
861 }
862 
863 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
864                                          llvm::Value *Size) {
865   const auto *PrivateVD =
866       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
867   QualType PrivateType = PrivateVD->getType();
868   if (!PrivateType->isVariablyModifiedType()) {
869     assert(!Size && !Sizes[N].second &&
870            "Size should be nullptr for non-variably modified reduction "
871            "items.");
872     return;
873   }
874   CodeGenFunction::OpaqueValueMapping OpaqueMap(
875       CGF,
876       cast<OpaqueValueExpr>(
877           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
878       RValue::get(Size));
879   CGF.EmitVariablyModifiedType(PrivateType);
880 }
881 
882 void ReductionCodeGen::emitInitialization(
883     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
884     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
885   assert(SharedAddresses.size() > N && "No variable was generated");
886   const auto *PrivateVD =
887       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
888   const OMPDeclareReductionDecl *DRD =
889       getReductionInit(ClausesData[N].ReductionOp);
890   QualType PrivateType = PrivateVD->getType();
891   PrivateAddr = CGF.Builder.CreateElementBitCast(
892       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
893   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894     if (DRD && DRD->getInitializer())
895       (void)DefaultInit(CGF);
896     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
897   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898     (void)DefaultInit(CGF);
899     QualType SharedType = SharedAddresses[N].first.getType();
900     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
901                                      PrivateAddr, SharedAddr, SharedType);
902   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905                          PrivateVD->getType().getQualifiers(),
906                          /*IsInitializer=*/false);
907   }
908 }
909 
910 bool ReductionCodeGen::needCleanups(unsigned N) {
911   const auto *PrivateVD =
912       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913   QualType PrivateType = PrivateVD->getType();
914   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915   return DTorKind != QualType::DK_none;
916 }
917 
918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919                                     Address PrivateAddr) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   if (needCleanups(N)) {
925     PrivateAddr = CGF.Builder.CreateElementBitCast(
926         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928   }
929 }
930 
931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932                           LValue BaseLV) {
933   BaseTy = BaseTy.getNonReferenceType();
934   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938     } else {
939       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941     }
942     BaseTy = BaseTy->getPointeeType();
943   }
944   return CGF.MakeAddrLValue(
945       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946                                        CGF.ConvertTypeForMem(ElTy)),
947       BaseLV.getType(), BaseLV.getBaseInfo(),
948       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953                           llvm::Value *Addr) {
954   Address Tmp = Address::invalid();
955   Address TopTmp = Address::invalid();
956   Address MostTopTmp = Address::invalid();
957   BaseTy = BaseTy.getNonReferenceType();
958   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960     Tmp = CGF.CreateMemTemp(BaseTy);
961     if (TopTmp.isValid())
962       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963     else
964       MostTopTmp = Tmp;
965     TopTmp = Tmp;
966     BaseTy = BaseTy->getPointeeType();
967   }
968   llvm::Type *Ty = BaseLVType;
969   if (Tmp.isValid())
970     Ty = Tmp.getElementType();
971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972   if (Tmp.isValid()) {
973     CGF.Builder.CreateStore(Addr, Tmp);
974     return MostTopTmp;
975   }
976   return Address(Addr, BaseLVAlignment);
977 }
978 
979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980   const VarDecl *OrigVD = nullptr;
981   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984       Base = TempOASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992       Base = TempASE->getBase()->IgnoreParenImpCasts();
993     DE = cast<DeclRefExpr>(Base);
994     OrigVD = cast<VarDecl>(DE->getDecl());
995   }
996   return OrigVD;
997 }
998 
999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000                                                Address PrivateAddr) {
1001   const DeclRefExpr *DE;
1002   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003     BaseDecls.emplace_back(OrigVD);
1004     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005     LValue BaseLValue =
1006         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007                     OriginalBaseLValue);
1008     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1009     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1010         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1011         SharedAddr.getPointer());
1012     llvm::Value *PrivatePointer =
1013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014             PrivateAddr.getPointer(), SharedAddr.getType());
1015     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017     return castToBase(CGF, OrigVD->getType(),
1018                       SharedAddresses[N].first.getType(),
1019                       OriginalBaseLValue.getAddress(CGF).getType(),
1020                       OriginalBaseLValue.getAlignment(), Ptr);
1021   }
1022   BaseDecls.emplace_back(
1023       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024   return PrivateAddr;
1025 }
1026 
1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   return DRD && DRD->getInitializer();
1031 }
1032 
1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034   return CGF.EmitLoadOfPointerLValue(
1035       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036       getThreadIDVariable()->getType()->castAs<PointerType>());
1037 }
1038 
1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040   if (!CGF.HaveInsertPoint())
1041     return;
1042   // 1.2.2 OpenMP Language Terminology
1043   // Structured block - An executable statement with a single entry at the
1044   // top and a single exit at the bottom.
1045   // The point of exit cannot be a branch out of the structured block.
1046   // longjmp() and throw() must not violate the entry/exit criteria.
1047   CGF.EHStack.pushTerminate();
1048   if (S)
1049     CGF.incrementProfileCounter(S);
1050   CodeGen(CGF);
1051   CGF.EHStack.popTerminate();
1052 }
1053 
1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055     CodeGenFunction &CGF) {
1056   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057                             getThreadIDVariable()->getType(),
1058                             AlignmentSource::Decl);
1059 }
1060 
1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062                                        QualType FieldTy) {
1063   auto *Field = FieldDecl::Create(
1064       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067   Field->setAccess(AS_public);
1068   DC->addDecl(Field);
1069   return Field;
1070 }
1071 
1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073                                  StringRef Separator)
1074     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077 
1078   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079   OMPBuilder.initialize();
1080   loadOffloadInfoMetadata();
1081 }
1082 
1083 void CGOpenMPRuntime::clear() {
1084   InternalVars.clear();
1085   // Clean non-target variable declarations possibly used only in debug info.
1086   for (const auto &Data : EmittedNonTargetVariables) {
1087     if (!Data.getValue().pointsToAliveValue())
1088       continue;
1089     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090     if (!GV)
1091       continue;
1092     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093       continue;
1094     GV->eraseFromParent();
1095   }
1096 }
1097 
1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099   SmallString<128> Buffer;
1100   llvm::raw_svector_ostream OS(Buffer);
1101   StringRef Sep = FirstSeparator;
1102   for (StringRef Part : Parts) {
1103     OS << Sep << Part;
1104     Sep = Separator;
1105   }
1106   return std::string(OS.str());
1107 }
1108 
1109 static llvm::Function *
1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111                           const Expr *CombinerInitializer, const VarDecl *In,
1112                           const VarDecl *Out, bool IsCombiner) {
1113   // void .omp_combiner.(Ty *in, Ty *out);
1114   ASTContext &C = CGM.getContext();
1115   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116   FunctionArgList Args;
1117   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   Args.push_back(&OmpOutParm);
1122   Args.push_back(&OmpInParm);
1123   const CGFunctionInfo &FnInfo =
1124       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126   std::string Name = CGM.getOpenMPRuntime().getName(
1127       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129                                     Name, &CGM.getModule());
1130   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131   if (CGM.getLangOpts().Optimize) {
1132     Fn->removeFnAttr(llvm::Attribute::NoInline);
1133     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135   }
1136   CodeGenFunction CGF(CGM);
1137   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140                     Out->getLocation());
1141   CodeGenFunction::OMPPrivateScope Scope(CGF);
1142   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145         .getAddress(CGF);
1146   });
1147   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   (void)Scope.Privatize();
1153   if (!IsCombiner && Out->hasInit() &&
1154       !CGF.isTrivialInitializer(Out->getInit())) {
1155     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156                          Out->getType().getQualifiers(),
1157                          /*IsInitializer=*/true);
1158   }
1159   if (CombinerInitializer)
1160     CGF.EmitIgnoredExpr(CombinerInitializer);
1161   Scope.ForceCleanup();
1162   CGF.FinishFunction();
1163   return Fn;
1164 }
1165 
1166 void CGOpenMPRuntime::emitUserDefinedReduction(
1167     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168   if (UDRMap.count(D) > 0)
1169     return;
1170   llvm::Function *Combiner = emitCombinerOrInitializer(
1171       CGM, D->getType(), D->getCombiner(),
1172       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174       /*IsCombiner=*/true);
1175   llvm::Function *Initializer = nullptr;
1176   if (const Expr *Init = D->getInitializer()) {
1177     Initializer = emitCombinerOrInitializer(
1178         CGM, D->getType(),
1179         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180                                                                      : nullptr,
1181         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183         /*IsCombiner=*/false);
1184   }
1185   UDRMap.try_emplace(D, Combiner, Initializer);
1186   if (CGF) {
1187     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188     Decls.second.push_back(D);
1189   }
1190 }
1191 
1192 std::pair<llvm::Function *, llvm::Function *>
1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194   auto I = UDRMap.find(D);
1195   if (I != UDRMap.end())
1196     return I->second;
1197   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198   return UDRMap.lookup(D);
1199 }
1200 
1201 namespace {
1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203 // Builder if one is present.
1204 struct PushAndPopStackRAII {
1205   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206                       bool HasCancel, llvm::omp::Directive Kind)
1207       : OMPBuilder(OMPBuilder) {
1208     if (!OMPBuilder)
1209       return;
1210 
1211     // The following callback is the crucial part of clangs cleanup process.
1212     //
1213     // NOTE:
1214     // Once the OpenMPIRBuilder is used to create parallel regions (and
1215     // similar), the cancellation destination (Dest below) is determined via
1216     // IP. That means if we have variables to finalize we split the block at IP,
1217     // use the new block (=BB) as destination to build a JumpDest (via
1218     // getJumpDestInCurrentScope(BB)) which then is fed to
1219     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220     // to push & pop an FinalizationInfo object.
1221     // The FiniCB will still be needed but at the point where the
1222     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224       assert(IP.getBlock()->end() == IP.getPoint() &&
1225              "Clang CG should cause non-terminated block!");
1226       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227       CGF.Builder.restoreIP(IP);
1228       CodeGenFunction::JumpDest Dest =
1229           CGF.getOMPCancelDestination(OMPD_parallel);
1230       CGF.EmitBranchThroughCleanup(Dest);
1231     };
1232 
1233     // TODO: Remove this once we emit parallel regions through the
1234     //       OpenMPIRBuilder as it can do this setup internally.
1235     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236     OMPBuilder->pushFinalizationCB(std::move(FI));
1237   }
1238   ~PushAndPopStackRAII() {
1239     if (OMPBuilder)
1240       OMPBuilder->popFinalizationCB();
1241   }
1242   llvm::OpenMPIRBuilder *OMPBuilder;
1243 };
1244 } // namespace
1245 
1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250   assert(ThreadIDVar->getType()->isPointerType() &&
1251          "thread id variable must be of type kmp_int32 *");
1252   CodeGenFunction CGF(CGM, true);
1253   bool HasCancel = false;
1254   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255     HasCancel = OPD->hasCancel();
1256   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259     HasCancel = OPSD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272 
1273   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274   //       parallel region to make cancellation barriers work properly.
1275   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278                                     HasCancel, OutlinedHelperName);
1279   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287   return emitParallelOrTeamsOutlinedFunction(
1288       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303     bool Tied, unsigned &NumberOfParts) {
1304   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305                                               PrePostActionTy &) {
1306     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308     llvm::Value *TaskArgs[] = {
1309         UpLoc, ThreadID,
1310         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311                                     TaskTVar->getType()->castAs<PointerType>())
1312             .getPointer(CGF)};
1313     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1315                         TaskArgs);
1316   };
1317   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318                                                             UntiedCodeGen);
1319   CodeGen.setAction(Action);
1320   assert(!ThreadIDVar->getType()->isPointerType() &&
1321          "thread id variable must be of type kmp_int32 for tasks");
1322   const OpenMPDirectiveKind Region =
1323       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324                                                       : OMPD_task;
1325   const CapturedStmt *CS = D.getCapturedStmt(Region);
1326   bool HasCancel = false;
1327   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335 
1336   CodeGenFunction CGF(CGM, true);
1337   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338                                         InnermostKind, HasCancel, Action);
1339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341   if (!Tied)
1342     NumberOfParts = Action.getNumberOfParts();
1343   return Res;
1344 }
1345 
1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347                              const RecordDecl *RD, const CGRecordLayout &RL,
1348                              ArrayRef<llvm::Constant *> Data) {
1349   llvm::StructType *StructTy = RL.getLLVMType();
1350   unsigned PrevIdx = 0;
1351   ConstantInitBuilder CIBuilder(CGM);
1352   auto DI = Data.begin();
1353   for (const FieldDecl *FD : RD->fields()) {
1354     unsigned Idx = RL.getLLVMFieldNo(FD);
1355     // Fill the alignment.
1356     for (unsigned I = PrevIdx; I < Idx; ++I)
1357       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358     PrevIdx = Idx + 1;
1359     Fields.add(*DI);
1360     ++DI;
1361   }
1362 }
1363 
1364 template <class... As>
1365 static llvm::GlobalVariable *
1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368                    As &&... Args) {
1369   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371   ConstantInitBuilder CIBuilder(CGM);
1372   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   return Fields.finishAndCreateGlobal(
1375       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376       std::forward<As>(Args)...);
1377 }
1378 
1379 template <typename T>
1380 static void
1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382                                          ArrayRef<llvm::Constant *> Data,
1383                                          T &Parent) {
1384   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387   buildStructValue(Fields, CGM, RD, RL, Data);
1388   Fields.finishAndAddTo(Parent);
1389 }
1390 
1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392                                              bool AtCurrentPoint) {
1393   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1395 
1396   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397   if (AtCurrentPoint) {
1398     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400   } else {
1401     Elem.second.ServiceInsertPt =
1402         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404   }
1405 }
1406 
1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409   if (Elem.second.ServiceInsertPt) {
1410     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411     Elem.second.ServiceInsertPt = nullptr;
1412     Ptr->eraseFromParent();
1413   }
1414 }
1415 
1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417                                                   SourceLocation Loc,
1418                                                   SmallString<128> &Buffer) {
1419   llvm::raw_svector_ostream OS(Buffer);
1420   // Build debug location
1421   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422   OS << ";" << PLoc.getFilename() << ";";
1423   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424     OS << FD->getQualifiedNameAsString();
1425   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426   return OS.str();
1427 }
1428 
1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430                                                  SourceLocation Loc,
1431                                                  unsigned Flags) {
1432   uint32_t SrcLocStrSize;
1433   llvm::Constant *SrcLocStr;
1434   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435       Loc.isInvalid()) {
1436     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437   } else {
1438     std::string FunctionName;
1439     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440       FunctionName = FD->getQualifiedNameAsString();
1441     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442     const char *FileName = PLoc.getFilename();
1443     unsigned Line = PLoc.getLine();
1444     unsigned Column = PLoc.getColumn();
1445     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446                                                 Column, SrcLocStrSize);
1447   }
1448   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449   return OMPBuilder.getOrCreateIdent(
1450       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451 }
1452 
1453 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454                                           SourceLocation Loc) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457   // the clang invariants used below might be broken.
1458   if (CGM.getLangOpts().OpenMPIRBuilder) {
1459     SmallString<128> Buffer;
1460     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461     uint32_t SrcLocStrSize;
1462     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464     return OMPBuilder.getOrCreateThreadID(
1465         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466   }
1467 
1468   llvm::Value *ThreadID = nullptr;
1469   // Check whether we've already cached a load of the thread id in this
1470   // function.
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end()) {
1473     ThreadID = I->second.ThreadID;
1474     if (ThreadID != nullptr)
1475       return ThreadID;
1476   }
1477   // If exceptions are enabled, do not use parameter to avoid possible crash.
1478   if (auto *OMPRegionInfo =
1479           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480     if (OMPRegionInfo->getThreadIDVariable()) {
1481       // Check if this an outlined function with thread id passed as argument.
1482       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485           !CGF.getLangOpts().CXXExceptions ||
1486           CGF.Builder.GetInsertBlock() == TopBlock ||
1487           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               TopBlock ||
1490           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491               CGF.Builder.GetInsertBlock()) {
1492         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493         // If value loaded in entry block, cache it and use it everywhere in
1494         // function.
1495         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497           Elem.second.ThreadID = ThreadID;
1498         }
1499         return ThreadID;
1500       }
1501     }
1502   }
1503 
1504   // This is not an outlined function region - need to call __kmpc_int32
1505   // kmpc_global_thread_num(ident_t *loc).
1506   // Generate thread id value and cache this value for use across the
1507   // function.
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   if (!Elem.second.ServiceInsertPt)
1510     setLocThreadIdInsertPt(CGF);
1511   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513   llvm::CallInst *Call = CGF.Builder.CreateCall(
1514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515                                             OMPRTL___kmpc_global_thread_num),
1516       emitUpdateLocation(CGF, Loc));
1517   Call->setCallingConv(CGF.getRuntimeCC());
1518   Elem.second.ThreadID = Call;
1519   return Call;
1520 }
1521 
1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525     clearLocThreadIdInsertPt(CGF);
1526     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527   }
1528   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530       UDRMap.erase(D);
1531     FunctionUDRMap.erase(CGF.CurFn);
1532   }
1533   auto I = FunctionUDMMap.find(CGF.CurFn);
1534   if (I != FunctionUDMMap.end()) {
1535     for(const auto *D : I->second)
1536       UDMMap.erase(D);
1537     FunctionUDMMap.erase(I);
1538   }
1539   LastprivateConditionalToTypes.erase(CGF.CurFn);
1540   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544   return OMPBuilder.IdentPtr;
1545 }
1546 
1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548   if (!Kmpc_MicroTy) {
1549     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553   }
1554   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555 }
1556 
1557 llvm::FunctionCallee
1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559                                              bool IsGPUDistribute) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name;
1563   if (IsGPUDistribute)
1564     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565                                     : "__kmpc_distribute_static_init_4u")
1566                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1567                                     : "__kmpc_distribute_static_init_8u");
1568   else
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570                                     : "__kmpc_for_static_init_4u")
1571                         : (IVSigned ? "__kmpc_for_static_init_8"
1572                                     : "__kmpc_for_static_init_8u");
1573 
1574   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576   llvm::Type *TypeParams[] = {
1577     getIdentTyPointerTy(),                     // loc
1578     CGM.Int32Ty,                               // tid
1579     CGM.Int32Ty,                               // schedtype
1580     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581     PtrTy,                                     // p_lower
1582     PtrTy,                                     // p_upper
1583     PtrTy,                                     // p_stride
1584     ITy,                                       // incr
1585     ITy                                        // chunk
1586   };
1587   auto *FnTy =
1588       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 llvm::FunctionCallee
1593 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594   assert((IVSize == 32 || IVSize == 64) &&
1595          "IV size is not compatible with the omp runtime");
1596   StringRef Name =
1597       IVSize == 32
1598           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602                                CGM.Int32Ty,           // tid
1603                                CGM.Int32Ty,           // schedtype
1604                                ITy,                   // lower
1605                                ITy,                   // upper
1606                                ITy,                   // stride
1607                                ITy                    // chunk
1608   };
1609   auto *FnTy =
1610       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611   return CGM.CreateRuntimeFunction(FnTy, Name);
1612 }
1613 
1614 llvm::FunctionCallee
1615 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616   assert((IVSize == 32 || IVSize == 64) &&
1617          "IV size is not compatible with the omp runtime");
1618   StringRef Name =
1619       IVSize == 32
1620           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622   llvm::Type *TypeParams[] = {
1623       getIdentTyPointerTy(), // loc
1624       CGM.Int32Ty,           // tid
1625   };
1626   auto *FnTy =
1627       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628   return CGM.CreateRuntimeFunction(FnTy, Name);
1629 }
1630 
1631 llvm::FunctionCallee
1632 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633   assert((IVSize == 32 || IVSize == 64) &&
1634          "IV size is not compatible with the omp runtime");
1635   StringRef Name =
1636       IVSize == 32
1637           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641   llvm::Type *TypeParams[] = {
1642     getIdentTyPointerTy(),                     // loc
1643     CGM.Int32Ty,                               // tid
1644     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645     PtrTy,                                     // p_lower
1646     PtrTy,                                     // p_upper
1647     PtrTy                                      // p_stride
1648   };
1649   auto *FnTy =
1650       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651   return CGM.CreateRuntimeFunction(FnTy, Name);
1652 }
1653 
1654 /// Obtain information that uniquely identifies a target entry. This
1655 /// consists of the file and device IDs as well as line number associated with
1656 /// the relevant entry source location.
1657 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658                                      unsigned &DeviceID, unsigned &FileID,
1659                                      unsigned &LineNum) {
1660   SourceManager &SM = C.getSourceManager();
1661 
1662   // The loc should be always valid and have a file ID (the user cannot use
1663   // #pragma directives in macros)
1664 
1665   assert(Loc.isValid() && "Source location is expected to be always valid.");
1666 
1667   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1669 
1670   llvm::sys::fs::UniqueID ID;
1671   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676           << PLoc.getFilename() << EC.message();
1677   }
1678 
1679   DeviceID = ID.getDevice();
1680   FileID = ID.getFile();
1681   LineNum = PLoc.getLine();
1682 }
1683 
1684 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685   if (CGM.getLangOpts().OpenMPSimd)
1686     return Address::invalid();
1687   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691                HasRequiresUnifiedSharedMemory))) {
1692     SmallString<64> PtrName;
1693     {
1694       llvm::raw_svector_ostream OS(PtrName);
1695       OS << CGM.getMangledName(GlobalDecl(VD));
1696       if (!VD->isExternallyVisible()) {
1697         unsigned DeviceID, FileID, Line;
1698         getTargetEntryUniqueInfo(CGM.getContext(),
1699                                  VD->getCanonicalDecl()->getBeginLoc(),
1700                                  DeviceID, FileID, Line);
1701         OS << llvm::format("_%x", FileID);
1702       }
1703       OS << "_decl_tgt_ref_ptr";
1704     }
1705     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706     if (!Ptr) {
1707       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709                                         PtrName);
1710 
1711       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713 
1714       if (!CGM.getLangOpts().OpenMPIsDevice)
1715         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717     }
1718     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1719   }
1720   return Address::invalid();
1721 }
1722 
1723 llvm::Constant *
1724 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1726          !CGM.getContext().getTargetInfo().isTLSSupported());
1727   // Lookup the entry, lazily creating it if necessary.
1728   std::string Suffix = getName({"cache", ""});
1729   return getOrCreateInternalVariable(
1730       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731 }
1732 
1733 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734                                                 const VarDecl *VD,
1735                                                 Address VDAddr,
1736                                                 SourceLocation Loc) {
1737   if (CGM.getLangOpts().OpenMPUseTLS &&
1738       CGM.getContext().getTargetInfo().isTLSSupported())
1739     return VDAddr;
1740 
1741   llvm::Type *VarTy = VDAddr.getElementType();
1742   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744                                                        CGM.Int8PtrTy),
1745                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746                          getOrCreateThreadPrivateCache(VD)};
1747   return Address(CGF.EmitRuntimeCall(
1748                      OMPBuilder.getOrCreateRuntimeFunction(
1749                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1750                      Args),
1751                  VDAddr.getAlignment());
1752 }
1753 
1754 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758   // library.
1759   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1760   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1761                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1762                       OMPLoc);
1763   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1764   // to register constructor/destructor for variable.
1765   llvm::Value *Args[] = {
1766       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1767       Ctor, CopyCtor, Dtor};
1768   CGF.EmitRuntimeCall(
1769       OMPBuilder.getOrCreateRuntimeFunction(
1770           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1771       Args);
1772 }
1773 
1774 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1775     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1776     bool PerformInit, CodeGenFunction *CGF) {
1777   if (CGM.getLangOpts().OpenMPUseTLS &&
1778       CGM.getContext().getTargetInfo().isTLSSupported())
1779     return nullptr;
1780 
1781   VD = VD->getDefinition(CGM.getContext());
1782   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1783     QualType ASTTy = VD->getType();
1784 
1785     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1786     const Expr *Init = VD->getAnyInitializer();
1787     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1788       // Generate function that re-emits the declaration's initializer into the
1789       // threadprivate copy of the variable VD
1790       CodeGenFunction CtorCGF(CGM);
1791       FunctionArgList Args;
1792       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1793                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1794                             ImplicitParamDecl::Other);
1795       Args.push_back(&Dst);
1796 
1797       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1798           CGM.getContext().VoidPtrTy, Args);
1799       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1800       std::string Name = getName({"__kmpc_global_ctor_", ""});
1801       llvm::Function *Fn =
1802           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1803       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1804                             Args, Loc, Loc);
1805       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1809       Arg = CtorCGF.Builder.CreateElementBitCast(
1810           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1811       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1812                                /*IsInitializer=*/true);
1813       ArgVal = CtorCGF.EmitLoadOfScalar(
1814           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1815           CGM.getContext().VoidPtrTy, Dst.getLocation());
1816       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1817       CtorCGF.FinishFunction();
1818       Ctor = Fn;
1819     }
1820     if (VD->getType().isDestructedType() != QualType::DK_none) {
1821       // Generate function that emits destructor call for the threadprivate copy
1822       // of the variable VD
1823       CodeGenFunction DtorCGF(CGM);
1824       FunctionArgList Args;
1825       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1826                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1827                             ImplicitParamDecl::Other);
1828       Args.push_back(&Dst);
1829 
1830       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1831           CGM.getContext().VoidTy, Args);
1832       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1833       std::string Name = getName({"__kmpc_global_dtor_", ""});
1834       llvm::Function *Fn =
1835           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1836       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1837       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1838                             Loc, Loc);
1839       // Create a scope with an artificial location for the body of this function.
1840       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1841       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1842           DtorCGF.GetAddrOfLocalVar(&Dst),
1843           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1844       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1845                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1846                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1847       DtorCGF.FinishFunction();
1848       Dtor = Fn;
1849     }
1850     // Do not emit init function if it is not required.
1851     if (!Ctor && !Dtor)
1852       return nullptr;
1853 
1854     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1855     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1856                                                /*isVarArg=*/false)
1857                            ->getPointerTo();
1858     // Copying constructor for the threadprivate variable.
1859     // Must be NULL - reserved by runtime, but currently it requires that this
1860     // parameter is always NULL. Otherwise it fires assertion.
1861     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1862     if (Ctor == nullptr) {
1863       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1864                                              /*isVarArg=*/false)
1865                          ->getPointerTo();
1866       Ctor = llvm::Constant::getNullValue(CtorTy);
1867     }
1868     if (Dtor == nullptr) {
1869       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1870                                              /*isVarArg=*/false)
1871                          ->getPointerTo();
1872       Dtor = llvm::Constant::getNullValue(DtorTy);
1873     }
1874     if (!CGF) {
1875       auto *InitFunctionTy =
1876           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1877       std::string Name = getName({"__omp_threadprivate_init_", ""});
1878       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1879           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1880       CodeGenFunction InitCGF(CGM);
1881       FunctionArgList ArgList;
1882       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1883                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1884                             Loc, Loc);
1885       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886       InitCGF.FinishFunction();
1887       return InitFunction;
1888     }
1889     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1890   }
1891   return nullptr;
1892 }
1893 
1894 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1895                                                      llvm::GlobalVariable *Addr,
1896                                                      bool PerformInit) {
1897   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1898       !CGM.getLangOpts().OpenMPIsDevice)
1899     return false;
1900   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1901       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1902   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1903       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1904        HasRequiresUnifiedSharedMemory))
1905     return CGM.getLangOpts().OpenMPIsDevice;
1906   VD = VD->getDefinition(CGM.getContext());
1907   assert(VD && "Unknown VarDecl");
1908 
1909   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911 
1912   QualType ASTTy = VD->getType();
1913   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1914 
1915   // Produce the unique prefix to identify the new target regions. We use
1916   // the source location of the variable declaration which we know to not
1917   // conflict with any target region.
1918   unsigned DeviceID;
1919   unsigned FileID;
1920   unsigned Line;
1921   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1922   SmallString<128> Buffer, Out;
1923   {
1924     llvm::raw_svector_ostream OS(Buffer);
1925     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1926        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1927   }
1928 
1929   const Expr *Init = VD->getAnyInitializer();
1930   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1931     llvm::Constant *Ctor;
1932     llvm::Constant *ID;
1933     if (CGM.getLangOpts().OpenMPIsDevice) {
1934       // Generate function that re-emits the declaration's initializer into
1935       // the threadprivate copy of the variable VD
1936       CodeGenFunction CtorCGF(CGM);
1937 
1938       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1939       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1940       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1941           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1942       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1943       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1944                             FunctionArgList(), Loc, Loc);
1945       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1946       CtorCGF.EmitAnyExprToMem(Init,
1947                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1948                                Init->getType().getQualifiers(),
1949                                /*IsInitializer=*/true);
1950       CtorCGF.FinishFunction();
1951       Ctor = Fn;
1952       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1953       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1954     } else {
1955       Ctor = new llvm::GlobalVariable(
1956           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1957           llvm::GlobalValue::PrivateLinkage,
1958           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1959       ID = Ctor;
1960     }
1961 
1962     // Register the information for the entry associated with the constructor.
1963     Out.clear();
1964     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1965         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1966         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1967   }
1968   if (VD->getType().isDestructedType() != QualType::DK_none) {
1969     llvm::Constant *Dtor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that emits destructor call for the threadprivate
1973       // copy of the variable VD
1974       CodeGenFunction DtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1981       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       // Create a scope with an artificial location for the body of this
1984       // function.
1985       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1986       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1987                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989       DtorCGF.FinishFunction();
1990       Dtor = Fn;
1991       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993     } else {
1994       Dtor = new llvm::GlobalVariable(
1995           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996           llvm::GlobalValue::PrivateLinkage,
1997           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998       ID = Dtor;
1999     }
2000     // Register the information for the entry associated with the destructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005   }
2006   return CGM.getLangOpts().OpenMPIsDevice;
2007 }
2008 
2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010                                                           QualType VarType,
2011                                                           StringRef Name) {
2012   std::string Suffix = getName({"artificial", ""});
2013   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014   llvm::GlobalVariable *GAddr =
2015       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017       CGM.getTarget().isTLSSupported()) {
2018     GAddr->setThreadLocal(/*Val=*/true);
2019     return Address(GAddr, GAddr->getValueType(),
2020                    CGM.getContext().getTypeAlignInChars(VarType));
2021   }
2022   std::string CacheSuffix = getName({"cache", ""});
2023   llvm::Value *Args[] = {
2024       emitUpdateLocation(CGF, SourceLocation()),
2025       getThreadID(CGF, SourceLocation()),
2026       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028                                 /*isSigned=*/false),
2029       getOrCreateInternalVariable(
2030           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031   return Address(
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033           CGF.EmitRuntimeCall(
2034               OMPBuilder.getOrCreateRuntimeFunction(
2035                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036               Args),
2037           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038       CGM.getContext().getTypeAlignInChars(VarType));
2039 }
2040 
2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042                                    const RegionCodeGenTy &ThenGen,
2043                                    const RegionCodeGenTy &ElseGen) {
2044   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045 
2046   // If the condition constant folds and can be elided, try to avoid emitting
2047   // the condition and the dead arm of the if/else.
2048   bool CondConstant;
2049   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050     if (CondConstant)
2051       ThenGen(CGF);
2052     else
2053       ElseGen(CGF);
2054     return;
2055   }
2056 
2057   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2058   // emit the conditional branch.
2059   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063 
2064   // Emit the 'then' code.
2065   CGF.EmitBlock(ThenBlock);
2066   ThenGen(CGF);
2067   CGF.EmitBranch(ContBlock);
2068   // Emit the 'else' code if present.
2069   // There is no need to emit line number for unconditional branch.
2070   (void)ApplyDebugLocation::CreateEmpty(CGF);
2071   CGF.EmitBlock(ElseBlock);
2072   ElseGen(CGF);
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBranch(ContBlock);
2076   // Emit the continuation block for code after the if.
2077   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078 }
2079 
2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081                                        llvm::Function *OutlinedFn,
2082                                        ArrayRef<llvm::Value *> CapturedVars,
2083                                        const Expr *IfCond,
2084                                        llvm::Value *NumThreads) {
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088   auto &M = CGM.getModule();
2089   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093     llvm::Value *Args[] = {
2094         RTLoc,
2095         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098     RealArgs.append(std::begin(Args), std::end(Args));
2099     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100 
2101     llvm::FunctionCallee RTLFn =
2102         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104   };
2105   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2107     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109     // Build calls:
2110     // __kmpc_serialized_parallel(&Loc, GTid);
2111     llvm::Value *Args[] = {RTLoc, ThreadID};
2112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113                             M, OMPRTL___kmpc_serialized_parallel),
2114                         Args);
2115 
2116     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118     Address ZeroAddrBound =
2119         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120                                          /*Name=*/".bound.zero.addr");
2121     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123     // ThreadId for serialized parallels is 0.
2124     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127 
2128     // Ensure we do not inline the function. This is trivially true for the ones
2129     // passed to __kmpc_fork_call but the ones called in serialized regions
2130     // could be inlined. This is not a perfect but it is closer to the invariant
2131     // we want, namely, every data environment starts with a new function.
2132     // TODO: We should pass the if condition to the runtime function and do the
2133     //       handling there. Much cleaner code.
2134     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137 
2138     // __kmpc_end_serialized_parallel(&Loc, GTid);
2139     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141                             M, OMPRTL___kmpc_end_serialized_parallel),
2142                         EndArgs);
2143   };
2144   if (IfCond) {
2145     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146   } else {
2147     RegionCodeGenTy ThenRCG(ThenGen);
2148     ThenRCG(CGF);
2149   }
2150 }
2151 
2152 // If we're inside an (outlined) parallel region, use the region info's
2153 // thread-ID variable (it is passed in a first argument of the outlined function
2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155 // regular serial code region, get thread ID by calling kmp_int32
2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157 // return the address of that temp.
2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159                                              SourceLocation Loc) {
2160   if (auto *OMPRegionInfo =
2161           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162     if (OMPRegionInfo->getThreadIDVariable())
2163       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164 
2165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166   QualType Int32Ty =
2167       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169   CGF.EmitStoreOfScalar(ThreadID,
2170                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171 
2172   return ThreadIDTemp;
2173 }
2174 
2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177   SmallString<256> Buffer;
2178   llvm::raw_svector_ostream Out(Buffer);
2179   Out << Name;
2180   StringRef RuntimeName = Out.str();
2181   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182   if (Elem.second) {
2183     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2184            "OMP internal variable has different type than requested");
2185     return &*Elem.second;
2186   }
2187 
2188   return Elem.second = new llvm::GlobalVariable(
2189              CGM.getModule(), Ty, /*IsConstant*/ false,
2190              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191              Elem.first(), /*InsertBefore=*/nullptr,
2192              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193 }
2194 
2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197   std::string Name = getName({Prefix, "var"});
2198   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199 }
2200 
2201 namespace {
2202 /// Common pre(post)-action for different OpenMP constructs.
2203 class CommonActionTy final : public PrePostActionTy {
2204   llvm::FunctionCallee EnterCallee;
2205   ArrayRef<llvm::Value *> EnterArgs;
2206   llvm::FunctionCallee ExitCallee;
2207   ArrayRef<llvm::Value *> ExitArgs;
2208   bool Conditional;
2209   llvm::BasicBlock *ContBlock = nullptr;
2210 
2211 public:
2212   CommonActionTy(llvm::FunctionCallee EnterCallee,
2213                  ArrayRef<llvm::Value *> EnterArgs,
2214                  llvm::FunctionCallee ExitCallee,
2215                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217         ExitArgs(ExitArgs), Conditional(Conditional) {}
2218   void Enter(CodeGenFunction &CGF) override {
2219     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220     if (Conditional) {
2221       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223       ContBlock = CGF.createBasicBlock("omp_if.end");
2224       // Generate the branch (If-stmt)
2225       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226       CGF.EmitBlock(ThenBlock);
2227     }
2228   }
2229   void Done(CodeGenFunction &CGF) {
2230     // Emit the rest of blocks/branches
2231     CGF.EmitBranch(ContBlock);
2232     CGF.EmitBlock(ContBlock, true);
2233   }
2234   void Exit(CodeGenFunction &CGF) override {
2235     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236   }
2237 };
2238 } // anonymous namespace
2239 
2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241                                          StringRef CriticalName,
2242                                          const RegionCodeGenTy &CriticalOpGen,
2243                                          SourceLocation Loc, const Expr *Hint) {
2244   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245   // CriticalOpGen();
2246   // __kmpc_end_critical(ident_t *, gtid, Lock);
2247   // Prepare arguments and build a call to __kmpc_critical
2248   if (!CGF.HaveInsertPoint())
2249     return;
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251                          getCriticalRegionLock(CriticalName)};
2252   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253                                                 std::end(Args));
2254   if (Hint) {
2255     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257   }
2258   CommonActionTy Action(
2259       OMPBuilder.getOrCreateRuntimeFunction(
2260           CGM.getModule(),
2261           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262       EnterArgs,
2263       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264                                             OMPRTL___kmpc_end_critical),
2265       Args);
2266   CriticalOpGen.setAction(Action);
2267   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268 }
2269 
2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271                                        const RegionCodeGenTy &MasterOpGen,
2272                                        SourceLocation Loc) {
2273   if (!CGF.HaveInsertPoint())
2274     return;
2275   // if(__kmpc_master(ident_t *, gtid)) {
2276   //   MasterOpGen();
2277   //   __kmpc_end_master(ident_t *, gtid);
2278   // }
2279   // Prepare arguments and build a call to __kmpc_master
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_master),
2283                         Args,
2284                         OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_end_master),
2286                         Args,
2287                         /*Conditional=*/true);
2288   MasterOpGen.setAction(Action);
2289   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290   Action.Done(CGF);
2291 }
2292 
2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &MaskedOpGen,
2295                                        SourceLocation Loc, const Expr *Filter) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299   //   MaskedOpGen();
2300   //   __kmpc_end_masked(iden_t *, gtid);
2301   // }
2302   // Prepare arguments and build a call to __kmpc_masked
2303   llvm::Value *FilterVal = Filter
2304                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307                          FilterVal};
2308   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309                             getThreadID(CGF, Loc)};
2310   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311                             CGM.getModule(), OMPRTL___kmpc_masked),
2312                         Args,
2313                         OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2315                         ArgsEnd,
2316                         /*Conditional=*/true);
2317   MaskedOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319   Action.Done(CGF);
2320 }
2321 
2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323                                         SourceLocation Loc) {
2324   if (!CGF.HaveInsertPoint())
2325     return;
2326   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327     OMPBuilder.createTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2582     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2583         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2584     return;
2585   }
2586 
2587   if (!CGF.HaveInsertPoint())
2588     return;
2589   // Build call __kmpc_cancel_barrier(loc, thread_id);
2590   // Build call __kmpc_barrier(loc, thread_id);
2591   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2592   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2593   // thread_id);
2594   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2595                          getThreadID(CGF, Loc)};
2596   if (OMPRegionInfo) {
2597     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2598       llvm::Value *Result = CGF.EmitRuntimeCall(
2599           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2600                                                 OMPRTL___kmpc_cancel_barrier),
2601           Args);
2602       if (EmitChecks) {
2603         // if (__kmpc_cancel_barrier()) {
2604         //   exit from construct;
2605         // }
2606         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2607         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2608         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2609         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2610         CGF.EmitBlock(ExitBB);
2611         //   exit from construct;
2612         CodeGenFunction::JumpDest CancelDestination =
2613             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2614         CGF.EmitBranchThroughCleanup(CancelDestination);
2615         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2616       }
2617       return;
2618     }
2619   }
2620   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2621                           CGM.getModule(), OMPRTL___kmpc_barrier),
2622                       Args);
2623 }
2624 
2625 /// Map the OpenMP loop schedule to the runtime enumeration.
2626 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2627                                           bool Chunked, bool Ordered) {
2628   switch (ScheduleKind) {
2629   case OMPC_SCHEDULE_static:
2630     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2631                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2632   case OMPC_SCHEDULE_dynamic:
2633     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2634   case OMPC_SCHEDULE_guided:
2635     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2636   case OMPC_SCHEDULE_runtime:
2637     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2638   case OMPC_SCHEDULE_auto:
2639     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2640   case OMPC_SCHEDULE_unknown:
2641     assert(!Chunked && "chunk was specified but schedule kind not known");
2642     return Ordered ? OMP_ord_static : OMP_sch_static;
2643   }
2644   llvm_unreachable("Unexpected runtime schedule");
2645 }
2646 
2647 /// Map the OpenMP distribute schedule to the runtime enumeration.
2648 static OpenMPSchedType
2649 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2650   // only static is allowed for dist_schedule
2651   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2652 }
2653 
2654 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2655                                          bool Chunked) const {
2656   OpenMPSchedType Schedule =
2657       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2658   return Schedule == OMP_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticNonchunked(
2662     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2663   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2664   return Schedule == OMP_dist_sch_static;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2668                                       bool Chunked) const {
2669   OpenMPSchedType Schedule =
2670       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2671   return Schedule == OMP_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isStaticChunked(
2675     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2676   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2677   return Schedule == OMP_dist_sch_static_chunked;
2678 }
2679 
2680 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2681   OpenMPSchedType Schedule =
2682       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2683   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2684   return Schedule != OMP_sch_static;
2685 }
2686 
2687 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2688                                   OpenMPScheduleClauseModifier M1,
2689                                   OpenMPScheduleClauseModifier M2) {
2690   int Modifier = 0;
2691   switch (M1) {
2692   case OMPC_SCHEDULE_MODIFIER_monotonic:
2693     Modifier = OMP_sch_modifier_monotonic;
2694     break;
2695   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2696     Modifier = OMP_sch_modifier_nonmonotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_simd:
2699     if (Schedule == OMP_sch_static_chunked)
2700       Schedule = OMP_sch_static_balanced_chunked;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_last:
2703   case OMPC_SCHEDULE_MODIFIER_unknown:
2704     break;
2705   }
2706   switch (M2) {
2707   case OMPC_SCHEDULE_MODIFIER_monotonic:
2708     Modifier = OMP_sch_modifier_monotonic;
2709     break;
2710   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2711     Modifier = OMP_sch_modifier_nonmonotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_simd:
2714     if (Schedule == OMP_sch_static_chunked)
2715       Schedule = OMP_sch_static_balanced_chunked;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_last:
2718   case OMPC_SCHEDULE_MODIFIER_unknown:
2719     break;
2720   }
2721   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2722   // If the static schedule kind is specified or if the ordered clause is
2723   // specified, and if the nonmonotonic modifier is not specified, the effect is
2724   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2725   // modifier is specified, the effect is as if the nonmonotonic modifier is
2726   // specified.
2727   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2728     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2729           Schedule == OMP_sch_static_balanced_chunked ||
2730           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2731           Schedule == OMP_dist_sch_static_chunked ||
2732           Schedule == OMP_dist_sch_static))
2733       Modifier = OMP_sch_modifier_nonmonotonic;
2734   }
2735   return Schedule | Modifier;
2736 }
2737 
2738 void CGOpenMPRuntime::emitForDispatchInit(
2739     CodeGenFunction &CGF, SourceLocation Loc,
2740     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2741     bool Ordered, const DispatchRTInput &DispatchValues) {
2742   if (!CGF.HaveInsertPoint())
2743     return;
2744   OpenMPSchedType Schedule = getRuntimeSchedule(
2745       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2746   assert(Ordered ||
2747          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2748           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2749           Schedule != OMP_sch_static_balanced_chunked));
2750   // Call __kmpc_dispatch_init(
2751   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2752   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2753   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2754 
2755   // If the Chunk was not specified in the clause - use default value 1.
2756   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2757                                             : CGF.Builder.getIntN(IVSize, 1);
2758   llvm::Value *Args[] = {
2759       emitUpdateLocation(CGF, Loc),
2760       getThreadID(CGF, Loc),
2761       CGF.Builder.getInt32(addMonoNonMonoModifier(
2762           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2763       DispatchValues.LB,                                     // Lower
2764       DispatchValues.UB,                                     // Upper
2765       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2766       Chunk                                                  // Chunk
2767   };
2768   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2769 }
2770 
2771 static void emitForStaticInitCall(
2772     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2773     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2774     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2775     const CGOpenMPRuntime::StaticRTInput &Values) {
2776   if (!CGF.HaveInsertPoint())
2777     return;
2778 
2779   assert(!Values.Ordered);
2780   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2781          Schedule == OMP_sch_static_balanced_chunked ||
2782          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2783          Schedule == OMP_dist_sch_static ||
2784          Schedule == OMP_dist_sch_static_chunked);
2785 
2786   // Call __kmpc_for_static_init(
2787   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2788   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2789   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2790   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2791   llvm::Value *Chunk = Values.Chunk;
2792   if (Chunk == nullptr) {
2793     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2794             Schedule == OMP_dist_sch_static) &&
2795            "expected static non-chunked schedule");
2796     // If the Chunk was not specified in the clause - use default value 1.
2797     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2798   } else {
2799     assert((Schedule == OMP_sch_static_chunked ||
2800             Schedule == OMP_sch_static_balanced_chunked ||
2801             Schedule == OMP_ord_static_chunked ||
2802             Schedule == OMP_dist_sch_static_chunked) &&
2803            "expected static chunked schedule");
2804   }
2805   llvm::Value *Args[] = {
2806       UpdateLocation,
2807       ThreadId,
2808       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2809                                                   M2)), // Schedule type
2810       Values.IL.getPointer(),                           // &isLastIter
2811       Values.LB.getPointer(),                           // &LB
2812       Values.UB.getPointer(),                           // &UB
2813       Values.ST.getPointer(),                           // &Stride
2814       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2815       Chunk                                             // Chunk
2816   };
2817   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2818 }
2819 
2820 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2821                                         SourceLocation Loc,
2822                                         OpenMPDirectiveKind DKind,
2823                                         const OpenMPScheduleTy &ScheduleKind,
2824                                         const StaticRTInput &Values) {
2825   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2826       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2827   assert(isOpenMPWorksharingDirective(DKind) &&
2828          "Expected loop-based or sections-based directive.");
2829   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2830                                              isOpenMPLoopDirective(DKind)
2831                                                  ? OMP_IDENT_WORK_LOOP
2832                                                  : OMP_IDENT_WORK_SECTIONS);
2833   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2834   llvm::FunctionCallee StaticInitFunction =
2835       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2836   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2838                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2839 }
2840 
2841 void CGOpenMPRuntime::emitDistributeStaticInit(
2842     CodeGenFunction &CGF, SourceLocation Loc,
2843     OpenMPDistScheduleClauseKind SchedKind,
2844     const CGOpenMPRuntime::StaticRTInput &Values) {
2845   OpenMPSchedType ScheduleNum =
2846       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2847   llvm::Value *UpdatedLocation =
2848       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2849   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2850   llvm::FunctionCallee StaticInitFunction;
2851   bool isGPUDistribute =
2852       CGM.getLangOpts().OpenMPIsDevice &&
2853       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2854   StaticInitFunction = createForStaticInitFunction(
2855       Values.IVSize, Values.IVSigned, isGPUDistribute);
2856 
2857   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2858                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2859                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2860 }
2861 
2862 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863                                           SourceLocation Loc,
2864                                           OpenMPDirectiveKind DKind) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc,
2870                          isOpenMPDistributeDirective(DKind)
2871                              ? OMP_IDENT_WORK_DISTRIBUTE
2872                              : isOpenMPLoopDirective(DKind)
2873                                    ? OMP_IDENT_WORK_LOOP
2874                                    : OMP_IDENT_WORK_SECTIONS),
2875       getThreadID(CGF, Loc)};
2876   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2877   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2878       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2879     CGF.EmitRuntimeCall(
2880         OMPBuilder.getOrCreateRuntimeFunction(
2881             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2882         Args);
2883   else
2884     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2886                         Args);
2887 }
2888 
2889 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2890                                                  SourceLocation Loc,
2891                                                  unsigned IVSize,
2892                                                  bool IVSigned) {
2893   if (!CGF.HaveInsertPoint())
2894     return;
2895   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2896   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2897   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2898 }
2899 
2900 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2901                                           SourceLocation Loc, unsigned IVSize,
2902                                           bool IVSigned, Address IL,
2903                                           Address LB, Address UB,
2904                                           Address ST) {
2905   // Call __kmpc_dispatch_next(
2906   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2907   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2908   //          kmp_int[32|64] *p_stride);
2909   llvm::Value *Args[] = {
2910       emitUpdateLocation(CGF, Loc),
2911       getThreadID(CGF, Loc),
2912       IL.getPointer(), // &isLastIter
2913       LB.getPointer(), // &Lower
2914       UB.getPointer(), // &Upper
2915       ST.getPointer()  // &Stride
2916   };
2917   llvm::Value *Call =
2918       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2919   return CGF.EmitScalarConversion(
2920       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2921       CGF.getContext().BoolTy, Loc);
2922 }
2923 
2924 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2925                                            llvm::Value *NumThreads,
2926                                            SourceLocation Loc) {
2927   if (!CGF.HaveInsertPoint())
2928     return;
2929   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2930   llvm::Value *Args[] = {
2931       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2933   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2934                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2935                       Args);
2936 }
2937 
2938 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2939                                          ProcBindKind ProcBind,
2940                                          SourceLocation Loc) {
2941   if (!CGF.HaveInsertPoint())
2942     return;
2943   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2944   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2945   llvm::Value *Args[] = {
2946       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2947       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2948   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2949                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2950                       Args);
2951 }
2952 
2953 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2954                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2955   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2956     OMPBuilder.createFlush(CGF.Builder);
2957   } else {
2958     if (!CGF.HaveInsertPoint())
2959       return;
2960     // Build call void __kmpc_flush(ident_t *loc)
2961     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2962                             CGM.getModule(), OMPRTL___kmpc_flush),
2963                         emitUpdateLocation(CGF, Loc));
2964   }
2965 }
2966 
2967 namespace {
2968 /// Indexes of fields for type kmp_task_t.
2969 enum KmpTaskTFields {
2970   /// List of shared variables.
2971   KmpTaskTShareds,
2972   /// Task routine.
2973   KmpTaskTRoutine,
2974   /// Partition id for the untied tasks.
2975   KmpTaskTPartId,
2976   /// Function with call of destructors for private variables.
2977   Data1,
2978   /// Task priority.
2979   Data2,
2980   /// (Taskloops only) Lower bound.
2981   KmpTaskTLowerBound,
2982   /// (Taskloops only) Upper bound.
2983   KmpTaskTUpperBound,
2984   /// (Taskloops only) Stride.
2985   KmpTaskTStride,
2986   /// (Taskloops only) Is last iteration flag.
2987   KmpTaskTLastIter,
2988   /// (Taskloops only) Reduction data.
2989   KmpTaskTReductions,
2990 };
2991 } // anonymous namespace
2992 
2993 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2994   return OffloadEntriesTargetRegion.empty() &&
2995          OffloadEntriesDeviceGlobalVar.empty();
2996 }
2997 
2998 /// Initialize target region entry.
2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3001                                     StringRef ParentName, unsigned LineNum,
3002                                     unsigned Order) {
3003   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3004                                              "only required for the device "
3005                                              "code generation.");
3006   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3007       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3008                                    OMPTargetRegionEntryTargetRegion);
3009   ++OffloadingEntriesNum;
3010 }
3011 
3012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3013     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3014                                   StringRef ParentName, unsigned LineNum,
3015                                   llvm::Constant *Addr, llvm::Constant *ID,
3016                                   OMPTargetRegionEntryKind Flags) {
3017   // If we are emitting code for a target, the entry is already initialized,
3018   // only has to be registered.
3019   if (CGM.getLangOpts().OpenMPIsDevice) {
3020     // This could happen if the device compilation is invoked standalone.
3021     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3022       return;
3023     auto &Entry =
3024         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3025     Entry.setAddress(Addr);
3026     Entry.setID(ID);
3027     Entry.setFlags(Flags);
3028   } else {
3029     if (Flags ==
3030             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3031         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3032                                  /*IgnoreAddressId*/ true))
3033       return;
3034     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3035            "Target region entry already registered!");
3036     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3037     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3038     ++OffloadingEntriesNum;
3039   }
3040 }
3041 
3042 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3043     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3044     bool IgnoreAddressId) const {
3045   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3046   if (PerDevice == OffloadEntriesTargetRegion.end())
3047     return false;
3048   auto PerFile = PerDevice->second.find(FileID);
3049   if (PerFile == PerDevice->second.end())
3050     return false;
3051   auto PerParentName = PerFile->second.find(ParentName);
3052   if (PerParentName == PerFile->second.end())
3053     return false;
3054   auto PerLine = PerParentName->second.find(LineNum);
3055   if (PerLine == PerParentName->second.end())
3056     return false;
3057   // Fail if this entry is already registered.
3058   if (!IgnoreAddressId &&
3059       (PerLine->second.getAddress() || PerLine->second.getID()))
3060     return false;
3061   return true;
3062 }
3063 
3064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3065     const OffloadTargetRegionEntryInfoActTy &Action) {
3066   // Scan all target region entries and perform the provided action.
3067   for (const auto &D : OffloadEntriesTargetRegion)
3068     for (const auto &F : D.second)
3069       for (const auto &P : F.second)
3070         for (const auto &L : P.second)
3071           Action(D.first, F.first, P.first(), L.first, L.second);
3072 }
3073 
3074 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3075     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3076                                        OMPTargetGlobalVarEntryKind Flags,
3077                                        unsigned Order) {
3078   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3079                                              "only required for the device "
3080                                              "code generation.");
3081   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3082   ++OffloadingEntriesNum;
3083 }
3084 
3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3087                                      CharUnits VarSize,
3088                                      OMPTargetGlobalVarEntryKind Flags,
3089                                      llvm::GlobalValue::LinkageTypes Linkage) {
3090   if (CGM.getLangOpts().OpenMPIsDevice) {
3091     // This could happen if the device compilation is invoked standalone.
3092     if (!hasDeviceGlobalVarEntryInfo(VarName))
3093       return;
3094     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3096       if (Entry.getVarSize().isZero()) {
3097         Entry.setVarSize(VarSize);
3098         Entry.setLinkage(Linkage);
3099       }
3100       return;
3101     }
3102     Entry.setVarSize(VarSize);
3103     Entry.setLinkage(Linkage);
3104     Entry.setAddress(Addr);
3105   } else {
3106     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3107       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3108       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3109              "Entry not initialized!");
3110       if (Entry.getVarSize().isZero()) {
3111         Entry.setVarSize(VarSize);
3112         Entry.setLinkage(Linkage);
3113       }
3114       return;
3115     }
3116     OffloadEntriesDeviceGlobalVar.try_emplace(
3117         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3118     ++OffloadingEntriesNum;
3119   }
3120 }
3121 
3122 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3123     actOnDeviceGlobalVarEntriesInfo(
3124         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3125   // Scan all target region entries and perform the provided action.
3126   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3127     Action(E.getKey(), E.getValue());
3128 }
3129 
3130 void CGOpenMPRuntime::createOffloadEntry(
3131     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3132     llvm::GlobalValue::LinkageTypes Linkage) {
3133   StringRef Name = Addr->getName();
3134   llvm::Module &M = CGM.getModule();
3135   llvm::LLVMContext &C = M.getContext();
3136 
3137   // Create constant string with the name.
3138   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3139 
3140   std::string StringName = getName({"omp_offloading", "entry_name"});
3141   auto *Str = new llvm::GlobalVariable(
3142       M, StrPtrInit->getType(), /*isConstant=*/true,
3143       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3144   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3145 
3146   llvm::Constant *Data[] = {
3147       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3148       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3149       llvm::ConstantInt::get(CGM.SizeTy, Size),
3150       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3151       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3152   std::string EntryName = getName({"omp_offloading", "entry", ""});
3153   llvm::GlobalVariable *Entry = createGlobalStruct(
3154       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3155       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3156 
3157   // The entry has to be created in the section the linker expects it to be.
3158   Entry->setSection("omp_offloading_entries");
3159 }
3160 
3161 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3162   // Emit the offloading entries and metadata so that the device codegen side
3163   // can easily figure out what to emit. The produced metadata looks like
3164   // this:
3165   //
3166   // !omp_offload.info = !{!1, ...}
3167   //
3168   // Right now we only generate metadata for function that contain target
3169   // regions.
3170 
3171   // If we are in simd mode or there are no entries, we don't need to do
3172   // anything.
3173   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3174     return;
3175 
3176   llvm::Module &M = CGM.getModule();
3177   llvm::LLVMContext &C = M.getContext();
3178   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3179                          SourceLocation, StringRef>,
3180               16>
3181       OrderedEntries(OffloadEntriesInfoManager.size());
3182   llvm::SmallVector<StringRef, 16> ParentFunctions(
3183       OffloadEntriesInfoManager.size());
3184 
3185   // Auxiliary methods to create metadata values and strings.
3186   auto &&GetMDInt = [this](unsigned V) {
3187     return llvm::ConstantAsMetadata::get(
3188         llvm::ConstantInt::get(CGM.Int32Ty, V));
3189   };
3190 
3191   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3192 
3193   // Create the offloading info metadata node.
3194   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3195 
3196   // Create function that emits metadata for each target region entry;
3197   auto &&TargetRegionMetadataEmitter =
3198       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3199        &GetMDString](
3200           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3201           unsigned Line,
3202           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3203         // Generate metadata for target regions. Each entry of this metadata
3204         // contains:
3205         // - Entry 0 -> Kind of this type of metadata (0).
3206         // - Entry 1 -> Device ID of the file where the entry was identified.
3207         // - Entry 2 -> File ID of the file where the entry was identified.
3208         // - Entry 3 -> Mangled name of the function where the entry was
3209         // identified.
3210         // - Entry 4 -> Line in the file where the entry was identified.
3211         // - Entry 5 -> Order the entry was created.
3212         // The first element of the metadata node is the kind.
3213         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3214                                  GetMDInt(FileID),      GetMDString(ParentName),
3215                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3216 
3217         SourceLocation Loc;
3218         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3219                   E = CGM.getContext().getSourceManager().fileinfo_end();
3220              I != E; ++I) {
3221           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3222               I->getFirst()->getUniqueID().getFile() == FileID) {
3223             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3224                 I->getFirst(), Line, 1);
3225             break;
3226           }
3227         }
3228         // Save this entry in the right position of the ordered entries array.
3229         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3230         ParentFunctions[E.getOrder()] = ParentName;
3231 
3232         // Add metadata to the named metadata node.
3233         MD->addOperand(llvm::MDNode::get(C, Ops));
3234       };
3235 
3236   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3237       TargetRegionMetadataEmitter);
3238 
3239   // Create function that emits metadata for each device global variable entry;
3240   auto &&DeviceGlobalVarMetadataEmitter =
3241       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3242        MD](StringRef MangledName,
3243            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3244                &E) {
3245         // Generate metadata for global variables. Each entry of this metadata
3246         // contains:
3247         // - Entry 0 -> Kind of this type of metadata (1).
3248         // - Entry 1 -> Mangled name of the variable.
3249         // - Entry 2 -> Declare target kind.
3250         // - Entry 3 -> Order the entry was created.
3251         // The first element of the metadata node is the kind.
3252         llvm::Metadata *Ops[] = {
3253             GetMDInt(E.getKind()), GetMDString(MangledName),
3254             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3255 
3256         // Save this entry in the right position of the ordered entries array.
3257         OrderedEntries[E.getOrder()] =
3258             std::make_tuple(&E, SourceLocation(), MangledName);
3259 
3260         // Add metadata to the named metadata node.
3261         MD->addOperand(llvm::MDNode::get(C, Ops));
3262       };
3263 
3264   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3265       DeviceGlobalVarMetadataEmitter);
3266 
3267   for (const auto &E : OrderedEntries) {
3268     assert(std::get<0>(E) && "All ordered entries must exist!");
3269     if (const auto *CE =
3270             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3271                 std::get<0>(E))) {
3272       if (!CE->getID() || !CE->getAddress()) {
3273         // Do not blame the entry if the parent funtion is not emitted.
3274         StringRef FnName = ParentFunctions[CE->getOrder()];
3275         if (!CGM.GetGlobalValue(FnName))
3276           continue;
3277         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3278             DiagnosticsEngine::Error,
3279             "Offloading entry for target region in %0 is incorrect: either the "
3280             "address or the ID is invalid.");
3281         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3282         continue;
3283       }
3284       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3285                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3286     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3287                                              OffloadEntryInfoDeviceGlobalVar>(
3288                    std::get<0>(E))) {
3289       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3290           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3291               CE->getFlags());
3292       switch (Flags) {
3293       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3294         if (CGM.getLangOpts().OpenMPIsDevice &&
3295             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3296           continue;
3297         if (!CE->getAddress()) {
3298           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299               DiagnosticsEngine::Error, "Offloading entry for declare target "
3300                                         "variable %0 is incorrect: the "
3301                                         "address is invalid.");
3302           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3303           continue;
3304         }
3305         // The vaiable has no definition - no need to add the entry.
3306         if (CE->getVarSize().isZero())
3307           continue;
3308         break;
3309       }
3310       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3311         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3312                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3313                "Declaret target link address is set.");
3314         if (CGM.getLangOpts().OpenMPIsDevice)
3315           continue;
3316         if (!CE->getAddress()) {
3317           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3318               DiagnosticsEngine::Error,
3319               "Offloading entry for declare target variable is incorrect: the "
3320               "address is invalid.");
3321           CGM.getDiags().Report(DiagID);
3322           continue;
3323         }
3324         break;
3325       }
3326       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3327                          CE->getVarSize().getQuantity(), Flags,
3328                          CE->getLinkage());
3329     } else {
3330       llvm_unreachable("Unsupported entry kind.");
3331     }
3332   }
3333 }
3334 
3335 /// Loads all the offload entries information from the host IR
3336 /// metadata.
3337 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3338   // If we are in target mode, load the metadata from the host IR. This code has
3339   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3340 
3341   if (!CGM.getLangOpts().OpenMPIsDevice)
3342     return;
3343 
3344   if (CGM.getLangOpts().OMPHostIRFile.empty())
3345     return;
3346 
3347   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3348   if (auto EC = Buf.getError()) {
3349     CGM.getDiags().Report(diag::err_cannot_open_file)
3350         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351     return;
3352   }
3353 
3354   llvm::LLVMContext C;
3355   auto ME = expectedToErrorOrAndEmitErrors(
3356       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3357 
3358   if (auto EC = ME.getError()) {
3359     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3360         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3361     CGM.getDiags().Report(DiagID)
3362         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3363     return;
3364   }
3365 
3366   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3367   if (!MD)
3368     return;
3369 
3370   for (llvm::MDNode *MN : MD->operands()) {
3371     auto &&GetMDInt = [MN](unsigned Idx) {
3372       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3373       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3374     };
3375 
3376     auto &&GetMDString = [MN](unsigned Idx) {
3377       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3378       return V->getString();
3379     };
3380 
3381     switch (GetMDInt(0)) {
3382     default:
3383       llvm_unreachable("Unexpected metadata!");
3384       break;
3385     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3386         OffloadingEntryInfoTargetRegion:
3387       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3388           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3389           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3390           /*Order=*/GetMDInt(5));
3391       break;
3392     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3393         OffloadingEntryInfoDeviceGlobalVar:
3394       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3395           /*MangledName=*/GetMDString(1),
3396           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3397               /*Flags=*/GetMDInt(2)),
3398           /*Order=*/GetMDInt(3));
3399       break;
3400     }
3401   }
3402 }
3403 
3404 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3405   if (!KmpRoutineEntryPtrTy) {
3406     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3407     ASTContext &C = CGM.getContext();
3408     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3409     FunctionProtoType::ExtProtoInfo EPI;
3410     KmpRoutineEntryPtrQTy = C.getPointerType(
3411         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3412     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3413   }
3414 }
3415 
3416 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3417   // Make sure the type of the entry is already created. This is the type we
3418   // have to create:
3419   // struct __tgt_offload_entry{
3420   //   void      *addr;       // Pointer to the offload entry info.
3421   //                          // (function or global)
3422   //   char      *name;       // Name of the function or global.
3423   //   size_t     size;       // Size of the entry info (0 if it a function).
3424   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3425   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3426   // };
3427   if (TgtOffloadEntryQTy.isNull()) {
3428     ASTContext &C = CGM.getContext();
3429     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3430     RD->startDefinition();
3431     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3432     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3433     addFieldToRecordDecl(C, RD, C.getSizeType());
3434     addFieldToRecordDecl(
3435         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436     addFieldToRecordDecl(
3437         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438     RD->completeDefinition();
3439     RD->addAttr(PackedAttr::CreateImplicit(C));
3440     TgtOffloadEntryQTy = C.getRecordType(RD);
3441   }
3442   return TgtOffloadEntryQTy;
3443 }
3444 
3445 namespace {
3446 struct PrivateHelpersTy {
3447   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3448                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3449       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3450         PrivateElemInit(PrivateElemInit) {}
3451   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3452   const Expr *OriginalRef = nullptr;
3453   const VarDecl *Original = nullptr;
3454   const VarDecl *PrivateCopy = nullptr;
3455   const VarDecl *PrivateElemInit = nullptr;
3456   bool isLocalPrivate() const {
3457     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3458   }
3459 };
3460 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3461 } // anonymous namespace
3462 
3463 static bool isAllocatableDecl(const VarDecl *VD) {
3464   const VarDecl *CVD = VD->getCanonicalDecl();
3465   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3466     return false;
3467   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3468   // Use the default allocation.
3469   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3470            !AA->getAllocator());
3471 }
3472 
3473 static RecordDecl *
3474 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3475   if (!Privates.empty()) {
3476     ASTContext &C = CGM.getContext();
3477     // Build struct .kmp_privates_t. {
3478     //         /*  private vars  */
3479     //       };
3480     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3481     RD->startDefinition();
3482     for (const auto &Pair : Privates) {
3483       const VarDecl *VD = Pair.second.Original;
3484       QualType Type = VD->getType().getNonReferenceType();
3485       // If the private variable is a local variable with lvalue ref type,
3486       // allocate the pointer instead of the pointee type.
3487       if (Pair.second.isLocalPrivate()) {
3488         if (VD->getType()->isLValueReferenceType())
3489           Type = C.getPointerType(Type);
3490         if (isAllocatableDecl(VD))
3491           Type = C.getPointerType(Type);
3492       }
3493       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3494       if (VD->hasAttrs()) {
3495         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3496              E(VD->getAttrs().end());
3497              I != E; ++I)
3498           FD->addAttr(*I);
3499       }
3500     }
3501     RD->completeDefinition();
3502     return RD;
3503   }
3504   return nullptr;
3505 }
3506 
3507 static RecordDecl *
3508 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3509                          QualType KmpInt32Ty,
3510                          QualType KmpRoutineEntryPointerQTy) {
3511   ASTContext &C = CGM.getContext();
3512   // Build struct kmp_task_t {
3513   //         void *              shareds;
3514   //         kmp_routine_entry_t routine;
3515   //         kmp_int32           part_id;
3516   //         kmp_cmplrdata_t data1;
3517   //         kmp_cmplrdata_t data2;
3518   // For taskloops additional fields:
3519   //         kmp_uint64          lb;
3520   //         kmp_uint64          ub;
3521   //         kmp_int64           st;
3522   //         kmp_int32           liter;
3523   //         void *              reductions;
3524   //       };
3525   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3526   UD->startDefinition();
3527   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3528   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3529   UD->completeDefinition();
3530   QualType KmpCmplrdataTy = C.getRecordType(UD);
3531   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3532   RD->startDefinition();
3533   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3534   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3535   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3536   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3538   if (isOpenMPTaskLoopDirective(Kind)) {
3539     QualType KmpUInt64Ty =
3540         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3541     QualType KmpInt64Ty =
3542         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3543     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3545     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3546     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3547     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3548   }
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 static RecordDecl *
3554 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3555                                      ArrayRef<PrivateDataTy> Privates) {
3556   ASTContext &C = CGM.getContext();
3557   // Build struct kmp_task_t_with_privates {
3558   //         kmp_task_t task_data;
3559   //         .kmp_privates_t. privates;
3560   //       };
3561   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3562   RD->startDefinition();
3563   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3564   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3565     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3566   RD->completeDefinition();
3567   return RD;
3568 }
3569 
3570 /// Emit a proxy function which accepts kmp_task_t as the second
3571 /// argument.
3572 /// \code
3573 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3574 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3575 ///   For taskloops:
3576 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3577 ///   tt->reductions, tt->shareds);
3578 ///   return 0;
3579 /// }
3580 /// \endcode
3581 static llvm::Function *
3582 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3583                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3584                       QualType KmpTaskTWithPrivatesPtrQTy,
3585                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3586                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3587                       llvm::Value *TaskPrivatesMap) {
3588   ASTContext &C = CGM.getContext();
3589   FunctionArgList Args;
3590   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3591                             ImplicitParamDecl::Other);
3592   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3593                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3594                                 ImplicitParamDecl::Other);
3595   Args.push_back(&GtidArg);
3596   Args.push_back(&TaskTypeArg);
3597   const auto &TaskEntryFnInfo =
3598       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3599   llvm::FunctionType *TaskEntryTy =
3600       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3601   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3602   auto *TaskEntry = llvm::Function::Create(
3603       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3604   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3605   TaskEntry->setDoesNotRecurse();
3606   CodeGenFunction CGF(CGM);
3607   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3608                     Loc, Loc);
3609 
3610   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3611   // tt,
3612   // For taskloops:
3613   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3614   // tt->task_data.shareds);
3615   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3616       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3617   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3618       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3619       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3620   const auto *KmpTaskTWithPrivatesQTyRD =
3621       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3622   LValue Base =
3623       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3624   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3625   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3626   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3627   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3628 
3629   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3630   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3631   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3632       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3633       CGF.ConvertTypeForMem(SharedsPtrTy));
3634 
3635   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3636   llvm::Value *PrivatesParam;
3637   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3638     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3639     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3640         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3641   } else {
3642     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3643   }
3644 
3645   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3646                                TaskPrivatesMap,
3647                                CGF.Builder
3648                                    .CreatePointerBitCastOrAddrSpaceCast(
3649                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3650                                    .getPointer()};
3651   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3652                                           std::end(CommonArgs));
3653   if (isOpenMPTaskLoopDirective(Kind)) {
3654     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3655     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3656     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3657     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3658     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3659     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3660     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3661     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3662     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3663     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3664     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3665     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3666     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3667     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3668     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3669     CallArgs.push_back(LBParam);
3670     CallArgs.push_back(UBParam);
3671     CallArgs.push_back(StParam);
3672     CallArgs.push_back(LIParam);
3673     CallArgs.push_back(RParam);
3674   }
3675   CallArgs.push_back(SharedsParam);
3676 
3677   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3678                                                   CallArgs);
3679   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3680                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3681   CGF.FinishFunction();
3682   return TaskEntry;
3683 }
3684 
3685 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3686                                             SourceLocation Loc,
3687                                             QualType KmpInt32Ty,
3688                                             QualType KmpTaskTWithPrivatesPtrQTy,
3689                                             QualType KmpTaskTWithPrivatesQTy) {
3690   ASTContext &C = CGM.getContext();
3691   FunctionArgList Args;
3692   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3693                             ImplicitParamDecl::Other);
3694   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3696                                 ImplicitParamDecl::Other);
3697   Args.push_back(&GtidArg);
3698   Args.push_back(&TaskTypeArg);
3699   const auto &DestructorFnInfo =
3700       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3701   llvm::FunctionType *DestructorFnTy =
3702       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3703   std::string Name =
3704       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3705   auto *DestructorFn =
3706       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3707                              Name, &CGM.getModule());
3708   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3709                                     DestructorFnInfo);
3710   DestructorFn->setDoesNotRecurse();
3711   CodeGenFunction CGF(CGM);
3712   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3713                     Args, Loc, Loc);
3714 
3715   LValue Base = CGF.EmitLoadOfPointerLValue(
3716       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3717       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3718   const auto *KmpTaskTWithPrivatesQTyRD =
3719       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3720   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3721   Base = CGF.EmitLValueForField(Base, *FI);
3722   for (const auto *Field :
3723        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3724     if (QualType::DestructionKind DtorKind =
3725             Field->getType().isDestructedType()) {
3726       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3727       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3728     }
3729   }
3730   CGF.FinishFunction();
3731   return DestructorFn;
3732 }
3733 
3734 /// Emit a privates mapping function for correct handling of private and
3735 /// firstprivate variables.
3736 /// \code
3737 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3738 /// **noalias priv1,...,  <tyn> **noalias privn) {
3739 ///   *priv1 = &.privates.priv1;
3740 ///   ...;
3741 ///   *privn = &.privates.privn;
3742 /// }
3743 /// \endcode
3744 static llvm::Value *
3745 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3746                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3747                                ArrayRef<PrivateDataTy> Privates) {
3748   ASTContext &C = CGM.getContext();
3749   FunctionArgList Args;
3750   ImplicitParamDecl TaskPrivatesArg(
3751       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3753       ImplicitParamDecl::Other);
3754   Args.push_back(&TaskPrivatesArg);
3755   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3756   unsigned Counter = 1;
3757   for (const Expr *E : Data.PrivateVars) {
3758     Args.push_back(ImplicitParamDecl::Create(
3759         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760         C.getPointerType(C.getPointerType(E->getType()))
3761             .withConst()
3762             .withRestrict(),
3763         ImplicitParamDecl::Other));
3764     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765     PrivateVarsPos[VD] = Counter;
3766     ++Counter;
3767   }
3768   for (const Expr *E : Data.FirstprivateVars) {
3769     Args.push_back(ImplicitParamDecl::Create(
3770         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3771         C.getPointerType(C.getPointerType(E->getType()))
3772             .withConst()
3773             .withRestrict(),
3774         ImplicitParamDecl::Other));
3775     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776     PrivateVarsPos[VD] = Counter;
3777     ++Counter;
3778   }
3779   for (const Expr *E : Data.LastprivateVars) {
3780     Args.push_back(ImplicitParamDecl::Create(
3781         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3782         C.getPointerType(C.getPointerType(E->getType()))
3783             .withConst()
3784             .withRestrict(),
3785         ImplicitParamDecl::Other));
3786     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787     PrivateVarsPos[VD] = Counter;
3788     ++Counter;
3789   }
3790   for (const VarDecl *VD : Data.PrivateLocals) {
3791     QualType Ty = VD->getType().getNonReferenceType();
3792     if (VD->getType()->isLValueReferenceType())
3793       Ty = C.getPointerType(Ty);
3794     if (isAllocatableDecl(VD))
3795       Ty = C.getPointerType(Ty);
3796     Args.push_back(ImplicitParamDecl::Create(
3797         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3798         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3799         ImplicitParamDecl::Other));
3800     PrivateVarsPos[VD] = Counter;
3801     ++Counter;
3802   }
3803   const auto &TaskPrivatesMapFnInfo =
3804       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3805   llvm::FunctionType *TaskPrivatesMapTy =
3806       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3807   std::string Name =
3808       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3809   auto *TaskPrivatesMap = llvm::Function::Create(
3810       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3811       &CGM.getModule());
3812   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3813                                     TaskPrivatesMapFnInfo);
3814   if (CGM.getLangOpts().Optimize) {
3815     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3816     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3817     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3818   }
3819   CodeGenFunction CGF(CGM);
3820   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3821                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3822 
3823   // *privi = &.privates.privi;
3824   LValue Base = CGF.EmitLoadOfPointerLValue(
3825       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3826       TaskPrivatesArg.getType()->castAs<PointerType>());
3827   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3828   Counter = 0;
3829   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3830     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3831     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3832     LValue RefLVal =
3833         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3834     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3835         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3836     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3837     ++Counter;
3838   }
3839   CGF.FinishFunction();
3840   return TaskPrivatesMap;
3841 }
3842 
3843 /// Emit initialization for private variables in task-based directives.
3844 static void emitPrivatesInit(CodeGenFunction &CGF,
3845                              const OMPExecutableDirective &D,
3846                              Address KmpTaskSharedsPtr, LValue TDBase,
3847                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3848                              QualType SharedsTy, QualType SharedsPtrTy,
3849                              const OMPTaskDataTy &Data,
3850                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3851   ASTContext &C = CGF.getContext();
3852   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3853   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3854   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3855                                  ? OMPD_taskloop
3856                                  : OMPD_task;
3857   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3858   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3859   LValue SrcBase;
3860   bool IsTargetTask =
3861       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3862       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3863   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3864   // PointersArray, SizesArray, and MappersArray. The original variables for
3865   // these arrays are not captured and we get their addresses explicitly.
3866   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3867       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3868     SrcBase = CGF.MakeAddrLValue(
3869         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3870             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3871         SharedsTy);
3872   }
3873   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3874   for (const PrivateDataTy &Pair : Privates) {
3875     // Do not initialize private locals.
3876     if (Pair.second.isLocalPrivate()) {
3877       ++FI;
3878       continue;
3879     }
3880     const VarDecl *VD = Pair.second.PrivateCopy;
3881     const Expr *Init = VD->getAnyInitializer();
3882     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3883                              !CGF.isTrivialInitializer(Init)))) {
3884       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3885       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3886         const VarDecl *OriginalVD = Pair.second.Original;
3887         // Check if the variable is the target-based BasePointersArray,
3888         // PointersArray, SizesArray, or MappersArray.
3889         LValue SharedRefLValue;
3890         QualType Type = PrivateLValue.getType();
3891         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3892         if (IsTargetTask && !SharedField) {
3893           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3894                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3895                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3896                          ->getNumParams() == 0 &&
3897                  isa<TranslationUnitDecl>(
3898                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3899                          ->getDeclContext()) &&
3900                  "Expected artificial target data variable.");
3901           SharedRefLValue =
3902               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3903         } else if (ForDup) {
3904           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3905           SharedRefLValue = CGF.MakeAddrLValue(
3906               Address(SharedRefLValue.getPointer(CGF),
3907                       C.getDeclAlign(OriginalVD)),
3908               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3909               SharedRefLValue.getTBAAInfo());
3910         } else if (CGF.LambdaCaptureFields.count(
3911                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3912                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3913           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3914         } else {
3915           // Processing for implicitly captured variables.
3916           InlinedOpenMPRegionRAII Region(
3917               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3918               /*HasCancel=*/false, /*NoInheritance=*/true);
3919           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920         }
3921         if (Type->isArrayType()) {
3922           // Initialize firstprivate array.
3923           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3924             // Perform simple memcpy.
3925             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3926           } else {
3927             // Initialize firstprivate array using element-by-element
3928             // initialization.
3929             CGF.EmitOMPAggregateAssign(
3930                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3931                 Type,
3932                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3933                                                   Address SrcElement) {
3934                   // Clean up any temporaries needed by the initialization.
3935                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3936                   InitScope.addPrivate(
3937                       Elem, [SrcElement]() -> Address { return SrcElement; });
3938                   (void)InitScope.Privatize();
3939                   // Emit initialization for single element.
3940                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3941                       CGF, &CapturesInfo);
3942                   CGF.EmitAnyExprToMem(Init, DestElement,
3943                                        Init->getType().getQualifiers(),
3944                                        /*IsInitializer=*/false);
3945                 });
3946           }
3947         } else {
3948           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3949           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3950             return SharedRefLValue.getAddress(CGF);
3951           });
3952           (void)InitScope.Privatize();
3953           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3954           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3955                              /*capturedByInit=*/false);
3956         }
3957       } else {
3958         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3959       }
3960     }
3961     ++FI;
3962   }
3963 }
3964 
3965 /// Check if duplication function is required for taskloops.
3966 static bool checkInitIsRequired(CodeGenFunction &CGF,
3967                                 ArrayRef<PrivateDataTy> Privates) {
3968   bool InitRequired = false;
3969   for (const PrivateDataTy &Pair : Privates) {
3970     if (Pair.second.isLocalPrivate())
3971       continue;
3972     const VarDecl *VD = Pair.second.PrivateCopy;
3973     const Expr *Init = VD->getAnyInitializer();
3974     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3975                                     !CGF.isTrivialInitializer(Init));
3976     if (InitRequired)
3977       break;
3978   }
3979   return InitRequired;
3980 }
3981 
3982 
3983 /// Emit task_dup function (for initialization of
3984 /// private/firstprivate/lastprivate vars and last_iter flag)
3985 /// \code
3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3987 /// lastpriv) {
3988 /// // setup lastprivate flag
3989 ///    task_dst->last = lastpriv;
3990 /// // could be constructor calls here...
3991 /// }
3992 /// \endcode
3993 static llvm::Value *
3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3995                     const OMPExecutableDirective &D,
3996                     QualType KmpTaskTWithPrivatesPtrQTy,
3997                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3999                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4000                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4001   ASTContext &C = CGM.getContext();
4002   FunctionArgList Args;
4003   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004                            KmpTaskTWithPrivatesPtrQTy,
4005                            ImplicitParamDecl::Other);
4006   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007                            KmpTaskTWithPrivatesPtrQTy,
4008                            ImplicitParamDecl::Other);
4009   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4010                                 ImplicitParamDecl::Other);
4011   Args.push_back(&DstArg);
4012   Args.push_back(&SrcArg);
4013   Args.push_back(&LastprivArg);
4014   const auto &TaskDupFnInfo =
4015       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4016   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4017   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4018   auto *TaskDup = llvm::Function::Create(
4019       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4020   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4021   TaskDup->setDoesNotRecurse();
4022   CodeGenFunction CGF(CGM);
4023   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4024                     Loc);
4025 
4026   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027       CGF.GetAddrOfLocalVar(&DstArg),
4028       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029   // task_dst->liter = lastpriv;
4030   if (WithLastIter) {
4031     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4032     LValue Base = CGF.EmitLValueForField(
4033         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4034     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4035     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4036         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4037     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4038   }
4039 
4040   // Emit initial values for private copies (if any).
4041   assert(!Privates.empty());
4042   Address KmpTaskSharedsPtr = Address::invalid();
4043   if (!Data.FirstprivateVars.empty()) {
4044     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4045         CGF.GetAddrOfLocalVar(&SrcArg),
4046         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4047     LValue Base = CGF.EmitLValueForField(
4048         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4049     KmpTaskSharedsPtr = Address(
4050         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4051                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4052                                                   KmpTaskTShareds)),
4053                              Loc),
4054         CGM.getNaturalTypeAlignment(SharedsTy));
4055   }
4056   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4057                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4058   CGF.FinishFunction();
4059   return TaskDup;
4060 }
4061 
4062 /// Checks if destructor function is required to be generated.
4063 /// \return true if cleanups are required, false otherwise.
4064 static bool
4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4066                          ArrayRef<PrivateDataTy> Privates) {
4067   for (const PrivateDataTy &P : Privates) {
4068     if (P.second.isLocalPrivate())
4069       continue;
4070     QualType Ty = P.second.Original->getType().getNonReferenceType();
4071     if (Ty.isDestructedType())
4072       return true;
4073   }
4074   return false;
4075 }
4076 
4077 namespace {
4078 /// Loop generator for OpenMP iterator expression.
4079 class OMPIteratorGeneratorScope final
4080     : public CodeGenFunction::OMPPrivateScope {
4081   CodeGenFunction &CGF;
4082   const OMPIteratorExpr *E = nullptr;
4083   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4084   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4085   OMPIteratorGeneratorScope() = delete;
4086   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4087 
4088 public:
4089   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4090       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4091     if (!E)
4092       return;
4093     SmallVector<llvm::Value *, 4> Uppers;
4094     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4095       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4096       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4097       addPrivate(VD, [&CGF, VD]() {
4098         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4099       });
4100       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4101       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4102         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4103                                  "counter.addr");
4104       });
4105     }
4106     Privatize();
4107 
4108     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4109       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110       LValue CLVal =
4111           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4112                              HelperData.CounterVD->getType());
4113       // Counter = 0;
4114       CGF.EmitStoreOfScalar(
4115           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4116           CLVal);
4117       CodeGenFunction::JumpDest &ContDest =
4118           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4119       CodeGenFunction::JumpDest &ExitDest =
4120           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4121       // N = <number-of_iterations>;
4122       llvm::Value *N = Uppers[I];
4123       // cont:
4124       // if (Counter < N) goto body; else goto exit;
4125       CGF.EmitBlock(ContDest.getBlock());
4126       auto *CVal =
4127           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4128       llvm::Value *Cmp =
4129           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4130               ? CGF.Builder.CreateICmpSLT(CVal, N)
4131               : CGF.Builder.CreateICmpULT(CVal, N);
4132       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4133       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4134       // body:
4135       CGF.EmitBlock(BodyBB);
4136       // Iteri = Begini + Counter * Stepi;
4137       CGF.EmitIgnoredExpr(HelperData.Update);
4138     }
4139   }
4140   ~OMPIteratorGeneratorScope() {
4141     if (!E)
4142       return;
4143     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4144       // Counter = Counter + 1;
4145       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4146       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4147       // goto cont;
4148       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4149       // exit:
4150       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4151     }
4152   }
4153 };
4154 } // namespace
4155 
4156 static std::pair<llvm::Value *, llvm::Value *>
4157 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4158   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4159   llvm::Value *Addr;
4160   if (OASE) {
4161     const Expr *Base = OASE->getBase();
4162     Addr = CGF.EmitScalarExpr(Base);
4163   } else {
4164     Addr = CGF.EmitLValue(E).getPointer(CGF);
4165   }
4166   llvm::Value *SizeVal;
4167   QualType Ty = E->getType();
4168   if (OASE) {
4169     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4170     for (const Expr *SE : OASE->getDimensions()) {
4171       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4172       Sz = CGF.EmitScalarConversion(
4173           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4174       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4175     }
4176   } else if (const auto *ASE =
4177                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4178     LValue UpAddrLVal =
4179         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4180     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4181     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4182         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4183     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4184     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4185     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4186   } else {
4187     SizeVal = CGF.getTypeSize(Ty);
4188   }
4189   return std::make_pair(Addr, SizeVal);
4190 }
4191 
4192 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4193 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4194   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4195   if (KmpTaskAffinityInfoTy.isNull()) {
4196     RecordDecl *KmpAffinityInfoRD =
4197         C.buildImplicitRecord("kmp_task_affinity_info_t");
4198     KmpAffinityInfoRD->startDefinition();
4199     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4200     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4201     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4202     KmpAffinityInfoRD->completeDefinition();
4203     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4204   }
4205 }
4206 
4207 CGOpenMPRuntime::TaskResultTy
4208 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4209                               const OMPExecutableDirective &D,
4210                               llvm::Function *TaskFunction, QualType SharedsTy,
4211                               Address Shareds, const OMPTaskDataTy &Data) {
4212   ASTContext &C = CGM.getContext();
4213   llvm::SmallVector<PrivateDataTy, 4> Privates;
4214   // Aggregate privates and sort them by the alignment.
4215   const auto *I = Data.PrivateCopies.begin();
4216   for (const Expr *E : Data.PrivateVars) {
4217     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4218     Privates.emplace_back(
4219         C.getDeclAlign(VD),
4220         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4221                          /*PrivateElemInit=*/nullptr));
4222     ++I;
4223   }
4224   I = Data.FirstprivateCopies.begin();
4225   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4226   for (const Expr *E : Data.FirstprivateVars) {
4227     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4228     Privates.emplace_back(
4229         C.getDeclAlign(VD),
4230         PrivateHelpersTy(
4231             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4232             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4233     ++I;
4234     ++IElemInitRef;
4235   }
4236   I = Data.LastprivateCopies.begin();
4237   for (const Expr *E : Data.LastprivateVars) {
4238     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4239     Privates.emplace_back(
4240         C.getDeclAlign(VD),
4241         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4242                          /*PrivateElemInit=*/nullptr));
4243     ++I;
4244   }
4245   for (const VarDecl *VD : Data.PrivateLocals) {
4246     if (isAllocatableDecl(VD))
4247       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4248     else
4249       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4250   }
4251   llvm::stable_sort(Privates,
4252                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4253                       return L.first > R.first;
4254                     });
4255   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4256   // Build type kmp_routine_entry_t (if not built yet).
4257   emitKmpRoutineEntryT(KmpInt32Ty);
4258   // Build type kmp_task_t (if not built yet).
4259   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4260     if (SavedKmpTaskloopTQTy.isNull()) {
4261       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4262           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4263     }
4264     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4265   } else {
4266     assert((D.getDirectiveKind() == OMPD_task ||
4267             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4268             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4269            "Expected taskloop, task or target directive");
4270     if (SavedKmpTaskTQTy.isNull()) {
4271       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4272           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4273     }
4274     KmpTaskTQTy = SavedKmpTaskTQTy;
4275   }
4276   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4277   // Build particular struct kmp_task_t for the given task.
4278   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4279       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4280   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4281   QualType KmpTaskTWithPrivatesPtrQTy =
4282       C.getPointerType(KmpTaskTWithPrivatesQTy);
4283   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4284   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4285       KmpTaskTWithPrivatesTy->getPointerTo();
4286   llvm::Value *KmpTaskTWithPrivatesTySize =
4287       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4288   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4289 
4290   // Emit initial values for private copies (if any).
4291   llvm::Value *TaskPrivatesMap = nullptr;
4292   llvm::Type *TaskPrivatesMapTy =
4293       std::next(TaskFunction->arg_begin(), 3)->getType();
4294   if (!Privates.empty()) {
4295     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4296     TaskPrivatesMap =
4297         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4298     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4299         TaskPrivatesMap, TaskPrivatesMapTy);
4300   } else {
4301     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4302         cast<llvm::PointerType>(TaskPrivatesMapTy));
4303   }
4304   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4305   // kmp_task_t *tt);
4306   llvm::Function *TaskEntry = emitProxyTaskFunction(
4307       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4308       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4309       TaskPrivatesMap);
4310 
4311   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4312   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4313   // kmp_routine_entry_t *task_entry);
4314   // Task flags. Format is taken from
4315   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4316   // description of kmp_tasking_flags struct.
4317   enum {
4318     TiedFlag = 0x1,
4319     FinalFlag = 0x2,
4320     DestructorsFlag = 0x8,
4321     PriorityFlag = 0x20,
4322     DetachableFlag = 0x40,
4323   };
4324   unsigned Flags = Data.Tied ? TiedFlag : 0;
4325   bool NeedsCleanup = false;
4326   if (!Privates.empty()) {
4327     NeedsCleanup =
4328         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4329     if (NeedsCleanup)
4330       Flags = Flags | DestructorsFlag;
4331   }
4332   if (Data.Priority.getInt())
4333     Flags = Flags | PriorityFlag;
4334   if (D.hasClausesOfKind<OMPDetachClause>())
4335     Flags = Flags | DetachableFlag;
4336   llvm::Value *TaskFlags =
4337       Data.Final.getPointer()
4338           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4339                                      CGF.Builder.getInt32(FinalFlag),
4340                                      CGF.Builder.getInt32(/*C=*/0))
4341           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4342   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4343   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4344   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4345       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4346       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347           TaskEntry, KmpRoutineEntryPtrTy)};
4348   llvm::Value *NewTask;
4349   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4350     // Check if we have any device clause associated with the directive.
4351     const Expr *Device = nullptr;
4352     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4353       Device = C->getDevice();
4354     // Emit device ID if any otherwise use default value.
4355     llvm::Value *DeviceID;
4356     if (Device)
4357       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4358                                            CGF.Int64Ty, /*isSigned=*/true);
4359     else
4360       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4361     AllocArgs.push_back(DeviceID);
4362     NewTask = CGF.EmitRuntimeCall(
4363         OMPBuilder.getOrCreateRuntimeFunction(
4364             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4365         AllocArgs);
4366   } else {
4367     NewTask =
4368         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4369                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4370                             AllocArgs);
4371   }
4372   // Emit detach clause initialization.
4373   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4374   // task_descriptor);
4375   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4376     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4377     LValue EvtLVal = CGF.EmitLValue(Evt);
4378 
4379     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4380     // int gtid, kmp_task_t *task);
4381     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4382     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4383     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4384     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4385         OMPBuilder.getOrCreateRuntimeFunction(
4386             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4387         {Loc, Tid, NewTask});
4388     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4389                                       Evt->getExprLoc());
4390     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4391   }
4392   // Process affinity clauses.
4393   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4394     // Process list of affinity data.
4395     ASTContext &C = CGM.getContext();
4396     Address AffinitiesArray = Address::invalid();
4397     // Calculate number of elements to form the array of affinity data.
4398     llvm::Value *NumOfElements = nullptr;
4399     unsigned NumAffinities = 0;
4400     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4401       if (const Expr *Modifier = C->getModifier()) {
4402         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4403         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4404           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4405           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4406           NumOfElements =
4407               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4408         }
4409       } else {
4410         NumAffinities += C->varlist_size();
4411       }
4412     }
4413     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4414     // Fields ids in kmp_task_affinity_info record.
4415     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4416 
4417     QualType KmpTaskAffinityInfoArrayTy;
4418     if (NumOfElements) {
4419       NumOfElements = CGF.Builder.CreateNUWAdd(
4420           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4421       auto *OVE = new (C) OpaqueValueExpr(
4422           Loc,
4423           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4424           VK_PRValue);
4425       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4426                                                     RValue::get(NumOfElements));
4427       KmpTaskAffinityInfoArrayTy =
4428           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4429                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4430       // Properly emit variable-sized array.
4431       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4432                                            ImplicitParamDecl::Other);
4433       CGF.EmitVarDecl(*PD);
4434       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4435       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4436                                                 /*isSigned=*/false);
4437     } else {
4438       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4439           KmpTaskAffinityInfoTy,
4440           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4441           ArrayType::Normal, /*IndexTypeQuals=*/0);
4442       AffinitiesArray =
4443           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4444       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4445       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4446                                              /*isSigned=*/false);
4447     }
4448 
4449     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4450     // Fill array by elements without iterators.
4451     unsigned Pos = 0;
4452     bool HasIterator = false;
4453     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4454       if (C->getModifier()) {
4455         HasIterator = true;
4456         continue;
4457       }
4458       for (const Expr *E : C->varlists()) {
4459         llvm::Value *Addr;
4460         llvm::Value *Size;
4461         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4462         LValue Base =
4463             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4464                                KmpTaskAffinityInfoTy);
4465         // affs[i].base_addr = &<Affinities[i].second>;
4466         LValue BaseAddrLVal = CGF.EmitLValueForField(
4467             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4468         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4469                               BaseAddrLVal);
4470         // affs[i].len = sizeof(<Affinities[i].second>);
4471         LValue LenLVal = CGF.EmitLValueForField(
4472             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4473         CGF.EmitStoreOfScalar(Size, LenLVal);
4474         ++Pos;
4475       }
4476     }
4477     LValue PosLVal;
4478     if (HasIterator) {
4479       PosLVal = CGF.MakeAddrLValue(
4480           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4481           C.getSizeType());
4482       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4483     }
4484     // Process elements with iterators.
4485     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4486       const Expr *Modifier = C->getModifier();
4487       if (!Modifier)
4488         continue;
4489       OMPIteratorGeneratorScope IteratorScope(
4490           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4491       for (const Expr *E : C->varlists()) {
4492         llvm::Value *Addr;
4493         llvm::Value *Size;
4494         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4495         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4496         LValue Base = CGF.MakeAddrLValue(
4497             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4498         // affs[i].base_addr = &<Affinities[i].second>;
4499         LValue BaseAddrLVal = CGF.EmitLValueForField(
4500             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4501         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4502                               BaseAddrLVal);
4503         // affs[i].len = sizeof(<Affinities[i].second>);
4504         LValue LenLVal = CGF.EmitLValueForField(
4505             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4506         CGF.EmitStoreOfScalar(Size, LenLVal);
4507         Idx = CGF.Builder.CreateNUWAdd(
4508             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4509         CGF.EmitStoreOfScalar(Idx, PosLVal);
4510       }
4511     }
4512     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4513     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4514     // naffins, kmp_task_affinity_info_t *affin_list);
4515     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4516     llvm::Value *GTid = getThreadID(CGF, Loc);
4517     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4518         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4519     // FIXME: Emit the function and ignore its result for now unless the
4520     // runtime function is properly implemented.
4521     (void)CGF.EmitRuntimeCall(
4522         OMPBuilder.getOrCreateRuntimeFunction(
4523             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4524         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4525   }
4526   llvm::Value *NewTaskNewTaskTTy =
4527       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528           NewTask, KmpTaskTWithPrivatesPtrTy);
4529   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4530                                                KmpTaskTWithPrivatesQTy);
4531   LValue TDBase =
4532       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4533   // Fill the data in the resulting kmp_task_t record.
4534   // Copy shareds if there are any.
4535   Address KmpTaskSharedsPtr = Address::invalid();
4536   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4537     KmpTaskSharedsPtr =
4538         Address(CGF.EmitLoadOfScalar(
4539                     CGF.EmitLValueForField(
4540                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4541                                            KmpTaskTShareds)),
4542                     Loc),
4543                 CGM.getNaturalTypeAlignment(SharedsTy));
4544     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4545     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4546     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4547   }
4548   // Emit initial values for private copies (if any).
4549   TaskResultTy Result;
4550   if (!Privates.empty()) {
4551     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4552                      SharedsTy, SharedsPtrTy, Data, Privates,
4553                      /*ForDup=*/false);
4554     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4555         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4556       Result.TaskDupFn = emitTaskDupFunction(
4557           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4558           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4559           /*WithLastIter=*/!Data.LastprivateVars.empty());
4560     }
4561   }
4562   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4563   enum { Priority = 0, Destructors = 1 };
4564   // Provide pointer to function with destructors for privates.
4565   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4566   const RecordDecl *KmpCmplrdataUD =
4567       (*FI)->getType()->getAsUnionType()->getDecl();
4568   if (NeedsCleanup) {
4569     llvm::Value *DestructorFn = emitDestructorsFunction(
4570         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4571         KmpTaskTWithPrivatesQTy);
4572     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4573     LValue DestructorsLV = CGF.EmitLValueForField(
4574         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4575     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576                               DestructorFn, KmpRoutineEntryPtrTy),
4577                           DestructorsLV);
4578   }
4579   // Set priority.
4580   if (Data.Priority.getInt()) {
4581     LValue Data2LV = CGF.EmitLValueForField(
4582         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4583     LValue PriorityLV = CGF.EmitLValueForField(
4584         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4585     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4586   }
4587   Result.NewTask = NewTask;
4588   Result.TaskEntry = TaskEntry;
4589   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4590   Result.TDBase = TDBase;
4591   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4592   return Result;
4593 }
4594 
4595 namespace {
4596 /// Dependence kind for RTL.
4597 enum RTLDependenceKindTy {
4598   DepIn = 0x01,
4599   DepInOut = 0x3,
4600   DepMutexInOutSet = 0x4
4601 };
4602 /// Fields ids in kmp_depend_info record.
4603 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4604 } // namespace
4605 
4606 /// Translates internal dependency kind into the runtime kind.
4607 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4608   RTLDependenceKindTy DepKind;
4609   switch (K) {
4610   case OMPC_DEPEND_in:
4611     DepKind = DepIn;
4612     break;
4613   // Out and InOut dependencies must use the same code.
4614   case OMPC_DEPEND_out:
4615   case OMPC_DEPEND_inout:
4616     DepKind = DepInOut;
4617     break;
4618   case OMPC_DEPEND_mutexinoutset:
4619     DepKind = DepMutexInOutSet;
4620     break;
4621   case OMPC_DEPEND_source:
4622   case OMPC_DEPEND_sink:
4623   case OMPC_DEPEND_depobj:
4624   case OMPC_DEPEND_unknown:
4625     llvm_unreachable("Unknown task dependence type");
4626   }
4627   return DepKind;
4628 }
4629 
4630 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4631 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4632                            QualType &FlagsTy) {
4633   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4634   if (KmpDependInfoTy.isNull()) {
4635     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4636     KmpDependInfoRD->startDefinition();
4637     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4638     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4639     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4640     KmpDependInfoRD->completeDefinition();
4641     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4642   }
4643 }
4644 
4645 std::pair<llvm::Value *, LValue>
4646 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4647                                    SourceLocation Loc) {
4648   ASTContext &C = CGM.getContext();
4649   QualType FlagsTy;
4650   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4651   RecordDecl *KmpDependInfoRD =
4652       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4653   LValue Base = CGF.EmitLoadOfPointerLValue(
4654       DepobjLVal.getAddress(CGF),
4655       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4656   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4657   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4658           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4659   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4660                             Base.getTBAAInfo());
4661   Address DepObjAddr = CGF.Builder.CreateGEP(
4662       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4663   LValue NumDepsBase = CGF.MakeAddrLValue(
4664       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4665   // NumDeps = deps[i].base_addr;
4666   LValue BaseAddrLVal = CGF.EmitLValueForField(
4667       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4668   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4669   return std::make_pair(NumDeps, Base);
4670 }
4671 
4672 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4673                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4674                            const OMPTaskDataTy::DependData &Data,
4675                            Address DependenciesArray) {
4676   CodeGenModule &CGM = CGF.CGM;
4677   ASTContext &C = CGM.getContext();
4678   QualType FlagsTy;
4679   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4680   RecordDecl *KmpDependInfoRD =
4681       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4682   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4683 
4684   OMPIteratorGeneratorScope IteratorScope(
4685       CGF, cast_or_null<OMPIteratorExpr>(
4686                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4687                                  : nullptr));
4688   for (const Expr *E : Data.DepExprs) {
4689     llvm::Value *Addr;
4690     llvm::Value *Size;
4691     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4692     LValue Base;
4693     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4694       Base = CGF.MakeAddrLValue(
4695           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4696     } else {
4697       LValue &PosLVal = *Pos.get<LValue *>();
4698       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4699       Base = CGF.MakeAddrLValue(
4700           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4701     }
4702     // deps[i].base_addr = &<Dependencies[i].second>;
4703     LValue BaseAddrLVal = CGF.EmitLValueForField(
4704         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4705     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4706                           BaseAddrLVal);
4707     // deps[i].len = sizeof(<Dependencies[i].second>);
4708     LValue LenLVal = CGF.EmitLValueForField(
4709         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4710     CGF.EmitStoreOfScalar(Size, LenLVal);
4711     // deps[i].flags = <Dependencies[i].first>;
4712     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4713     LValue FlagsLVal = CGF.EmitLValueForField(
4714         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4715     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4716                           FlagsLVal);
4717     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4718       ++(*P);
4719     } else {
4720       LValue &PosLVal = *Pos.get<LValue *>();
4721       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4722       Idx = CGF.Builder.CreateNUWAdd(Idx,
4723                                      llvm::ConstantInt::get(Idx->getType(), 1));
4724       CGF.EmitStoreOfScalar(Idx, PosLVal);
4725     }
4726   }
4727 }
4728 
4729 static SmallVector<llvm::Value *, 4>
4730 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731                         const OMPTaskDataTy::DependData &Data) {
4732   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4733          "Expected depobj dependecy kind.");
4734   SmallVector<llvm::Value *, 4> Sizes;
4735   SmallVector<LValue, 4> SizeLVals;
4736   ASTContext &C = CGF.getContext();
4737   QualType FlagsTy;
4738   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4739   RecordDecl *KmpDependInfoRD =
4740       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4741   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4742   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4743   {
4744     OMPIteratorGeneratorScope IteratorScope(
4745         CGF, cast_or_null<OMPIteratorExpr>(
4746                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4747                                    : nullptr));
4748     for (const Expr *E : Data.DepExprs) {
4749       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4750       LValue Base = CGF.EmitLoadOfPointerLValue(
4751           DepobjLVal.getAddress(CGF),
4752           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4753       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4754           Base.getAddress(CGF), KmpDependInfoPtrT);
4755       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4756                                 Base.getTBAAInfo());
4757       Address DepObjAddr = CGF.Builder.CreateGEP(
4758           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4759       LValue NumDepsBase = CGF.MakeAddrLValue(
4760           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4761       // NumDeps = deps[i].base_addr;
4762       LValue BaseAddrLVal = CGF.EmitLValueForField(
4763           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4764       llvm::Value *NumDeps =
4765           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4766       LValue NumLVal = CGF.MakeAddrLValue(
4767           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4768           C.getUIntPtrType());
4769       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4770                               NumLVal.getAddress(CGF));
4771       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4772       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4773       CGF.EmitStoreOfScalar(Add, NumLVal);
4774       SizeLVals.push_back(NumLVal);
4775     }
4776   }
4777   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4778     llvm::Value *Size =
4779         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4780     Sizes.push_back(Size);
4781   }
4782   return Sizes;
4783 }
4784 
4785 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4786                                LValue PosLVal,
4787                                const OMPTaskDataTy::DependData &Data,
4788                                Address DependenciesArray) {
4789   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4790          "Expected depobj dependecy kind.");
4791   ASTContext &C = CGF.getContext();
4792   QualType FlagsTy;
4793   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4794   RecordDecl *KmpDependInfoRD =
4795       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4796   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4797   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4798   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4799   {
4800     OMPIteratorGeneratorScope IteratorScope(
4801         CGF, cast_or_null<OMPIteratorExpr>(
4802                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4803                                    : nullptr));
4804     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4805       const Expr *E = Data.DepExprs[I];
4806       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4807       LValue Base = CGF.EmitLoadOfPointerLValue(
4808           DepobjLVal.getAddress(CGF),
4809           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4810       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4811           Base.getAddress(CGF), KmpDependInfoPtrT);
4812       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4813                                 Base.getTBAAInfo());
4814 
4815       // Get number of elements in a single depobj.
4816       Address DepObjAddr = CGF.Builder.CreateGEP(
4817           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4818       LValue NumDepsBase = CGF.MakeAddrLValue(
4819           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4820       // NumDeps = deps[i].base_addr;
4821       LValue BaseAddrLVal = CGF.EmitLValueForField(
4822           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4823       llvm::Value *NumDeps =
4824           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4825 
4826       // memcopy dependency data.
4827       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4828           ElSize,
4829           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4830       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4831       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4832       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4833 
4834       // Increase pos.
4835       // pos += size;
4836       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4837       CGF.EmitStoreOfScalar(Add, PosLVal);
4838     }
4839   }
4840 }
4841 
4842 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4843     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4844     SourceLocation Loc) {
4845   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4846         return D.DepExprs.empty();
4847       }))
4848     return std::make_pair(nullptr, Address::invalid());
4849   // Process list of dependencies.
4850   ASTContext &C = CGM.getContext();
4851   Address DependenciesArray = Address::invalid();
4852   llvm::Value *NumOfElements = nullptr;
4853   unsigned NumDependencies = std::accumulate(
4854       Dependencies.begin(), Dependencies.end(), 0,
4855       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4856         return D.DepKind == OMPC_DEPEND_depobj
4857                    ? V
4858                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4859       });
4860   QualType FlagsTy;
4861   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4862   bool HasDepobjDeps = false;
4863   bool HasRegularWithIterators = false;
4864   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4865   llvm::Value *NumOfRegularWithIterators =
4866       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4867   // Calculate number of depobj dependecies and regular deps with the iterators.
4868   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4869     if (D.DepKind == OMPC_DEPEND_depobj) {
4870       SmallVector<llvm::Value *, 4> Sizes =
4871           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4872       for (llvm::Value *Size : Sizes) {
4873         NumOfDepobjElements =
4874             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4875       }
4876       HasDepobjDeps = true;
4877       continue;
4878     }
4879     // Include number of iterations, if any.
4880 
4881     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4882       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4883         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4884         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4885         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4886             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4887         NumOfRegularWithIterators =
4888             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4889       }
4890       HasRegularWithIterators = true;
4891       continue;
4892     }
4893   }
4894 
4895   QualType KmpDependInfoArrayTy;
4896   if (HasDepobjDeps || HasRegularWithIterators) {
4897     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4898                                            /*isSigned=*/false);
4899     if (HasDepobjDeps) {
4900       NumOfElements =
4901           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4902     }
4903     if (HasRegularWithIterators) {
4904       NumOfElements =
4905           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4906     }
4907     auto *OVE = new (C) OpaqueValueExpr(
4908         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4909         VK_PRValue);
4910     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4911                                                   RValue::get(NumOfElements));
4912     KmpDependInfoArrayTy =
4913         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4914                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4915     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4916     // Properly emit variable-sized array.
4917     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4918                                          ImplicitParamDecl::Other);
4919     CGF.EmitVarDecl(*PD);
4920     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4921     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4922                                               /*isSigned=*/false);
4923   } else {
4924     KmpDependInfoArrayTy = C.getConstantArrayType(
4925         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4926         ArrayType::Normal, /*IndexTypeQuals=*/0);
4927     DependenciesArray =
4928         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4929     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4930     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4931                                            /*isSigned=*/false);
4932   }
4933   unsigned Pos = 0;
4934   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4935     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4936         Dependencies[I].IteratorExpr)
4937       continue;
4938     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4939                    DependenciesArray);
4940   }
4941   // Copy regular dependecies with iterators.
4942   LValue PosLVal = CGF.MakeAddrLValue(
4943       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4944   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4945   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4946     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4947         !Dependencies[I].IteratorExpr)
4948       continue;
4949     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4950                    DependenciesArray);
4951   }
4952   // Copy final depobj arrays without iterators.
4953   if (HasDepobjDeps) {
4954     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4955       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4956         continue;
4957       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4958                          DependenciesArray);
4959     }
4960   }
4961   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4962       DependenciesArray, CGF.VoidPtrTy);
4963   return std::make_pair(NumOfElements, DependenciesArray);
4964 }
4965 
4966 Address CGOpenMPRuntime::emitDepobjDependClause(
4967     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4968     SourceLocation Loc) {
4969   if (Dependencies.DepExprs.empty())
4970     return Address::invalid();
4971   // Process list of dependencies.
4972   ASTContext &C = CGM.getContext();
4973   Address DependenciesArray = Address::invalid();
4974   unsigned NumDependencies = Dependencies.DepExprs.size();
4975   QualType FlagsTy;
4976   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4977   RecordDecl *KmpDependInfoRD =
4978       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4979 
4980   llvm::Value *Size;
4981   // Define type kmp_depend_info[<Dependencies.size()>];
4982   // For depobj reserve one extra element to store the number of elements.
4983   // It is required to handle depobj(x) update(in) construct.
4984   // kmp_depend_info[<Dependencies.size()>] deps;
4985   llvm::Value *NumDepsVal;
4986   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4987   if (const auto *IE =
4988           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4989     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4990     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4991       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4992       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4993       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4994     }
4995     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4996                                     NumDepsVal);
4997     CharUnits SizeInBytes =
4998         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4999     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5000     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5001     NumDepsVal =
5002         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5003   } else {
5004     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5005         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5006         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5007     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5008     Size = CGM.getSize(Sz.alignTo(Align));
5009     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5010   }
5011   // Need to allocate on the dynamic memory.
5012   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5013   // Use default allocator.
5014   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5015   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5016 
5017   llvm::Value *Addr =
5018       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5019                               CGM.getModule(), OMPRTL___kmpc_alloc),
5020                           Args, ".dep.arr.addr");
5021   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5023   DependenciesArray = Address(Addr, Align);
5024   // Write number of elements in the first element of array for depobj.
5025   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5026   // deps[i].base_addr = NumDependencies;
5027   LValue BaseAddrLVal = CGF.EmitLValueForField(
5028       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5029   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5030   llvm::PointerUnion<unsigned *, LValue *> Pos;
5031   unsigned Idx = 1;
5032   LValue PosLVal;
5033   if (Dependencies.IteratorExpr) {
5034     PosLVal = CGF.MakeAddrLValue(
5035         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5036         C.getSizeType());
5037     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5038                           /*IsInit=*/true);
5039     Pos = &PosLVal;
5040   } else {
5041     Pos = &Idx;
5042   }
5043   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5044   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5046   return DependenciesArray;
5047 }
5048 
5049 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5050                                         SourceLocation Loc) {
5051   ASTContext &C = CGM.getContext();
5052   QualType FlagsTy;
5053   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5054   LValue Base = CGF.EmitLoadOfPointerLValue(
5055       DepobjLVal.getAddress(CGF),
5056       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5057   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5058   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5059       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5060   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5061       Addr.getElementType(), Addr.getPointer(),
5062       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5063   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5064                                                                CGF.VoidPtrTy);
5065   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5066   // Use default allocator.
5067   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5068   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5069 
5070   // _kmpc_free(gtid, addr, nullptr);
5071   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5072                                 CGM.getModule(), OMPRTL___kmpc_free),
5073                             Args);
5074 }
5075 
5076 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5077                                        OpenMPDependClauseKind NewDepKind,
5078                                        SourceLocation Loc) {
5079   ASTContext &C = CGM.getContext();
5080   QualType FlagsTy;
5081   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5082   RecordDecl *KmpDependInfoRD =
5083       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5084   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5085   llvm::Value *NumDeps;
5086   LValue Base;
5087   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5088 
5089   Address Begin = Base.getAddress(CGF);
5090   // Cast from pointer to array type to pointer to single element.
5091   llvm::Value *End = CGF.Builder.CreateGEP(
5092       Begin.getElementType(), Begin.getPointer(), NumDeps);
5093   // The basic structure here is a while-do loop.
5094   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5095   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5096   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5097   CGF.EmitBlock(BodyBB);
5098   llvm::PHINode *ElementPHI =
5099       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5100   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5101   Begin = Address(ElementPHI, Begin.getAlignment());
5102   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5103                             Base.getTBAAInfo());
5104   // deps[i].flags = NewDepKind;
5105   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5106   LValue FlagsLVal = CGF.EmitLValueForField(
5107       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5108   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5109                         FlagsLVal);
5110 
5111   // Shift the address forward by one element.
5112   Address ElementNext =
5113       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5114   ElementPHI->addIncoming(ElementNext.getPointer(),
5115                           CGF.Builder.GetInsertBlock());
5116   llvm::Value *IsEmpty =
5117       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5118   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5119   // Done.
5120   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5121 }
5122 
5123 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5124                                    const OMPExecutableDirective &D,
5125                                    llvm::Function *TaskFunction,
5126                                    QualType SharedsTy, Address Shareds,
5127                                    const Expr *IfCond,
5128                                    const OMPTaskDataTy &Data) {
5129   if (!CGF.HaveInsertPoint())
5130     return;
5131 
5132   TaskResultTy Result =
5133       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5134   llvm::Value *NewTask = Result.NewTask;
5135   llvm::Function *TaskEntry = Result.TaskEntry;
5136   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5137   LValue TDBase = Result.TDBase;
5138   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5139   // Process list of dependences.
5140   Address DependenciesArray = Address::invalid();
5141   llvm::Value *NumOfElements;
5142   std::tie(NumOfElements, DependenciesArray) =
5143       emitDependClause(CGF, Data.Dependences, Loc);
5144 
5145   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5146   // libcall.
5147   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5148   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5149   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5150   // list is not empty
5151   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5152   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5153   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5154   llvm::Value *DepTaskArgs[7];
5155   if (!Data.Dependences.empty()) {
5156     DepTaskArgs[0] = UpLoc;
5157     DepTaskArgs[1] = ThreadID;
5158     DepTaskArgs[2] = NewTask;
5159     DepTaskArgs[3] = NumOfElements;
5160     DepTaskArgs[4] = DependenciesArray.getPointer();
5161     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5162     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5163   }
5164   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5165                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5166     if (!Data.Tied) {
5167       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5168       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5169       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5170     }
5171     if (!Data.Dependences.empty()) {
5172       CGF.EmitRuntimeCall(
5173           OMPBuilder.getOrCreateRuntimeFunction(
5174               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5175           DepTaskArgs);
5176     } else {
5177       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5178                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5179                           TaskArgs);
5180     }
5181     // Check if parent region is untied and build return for untied task;
5182     if (auto *Region =
5183             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5184       Region->emitUntiedSwitch(CGF);
5185   };
5186 
5187   llvm::Value *DepWaitTaskArgs[6];
5188   if (!Data.Dependences.empty()) {
5189     DepWaitTaskArgs[0] = UpLoc;
5190     DepWaitTaskArgs[1] = ThreadID;
5191     DepWaitTaskArgs[2] = NumOfElements;
5192     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5193     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5194     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5195   }
5196   auto &M = CGM.getModule();
5197   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5198                         TaskEntry, &Data, &DepWaitTaskArgs,
5199                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5200     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5201     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5202     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5203     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5204     // is specified.
5205     if (!Data.Dependences.empty())
5206       CGF.EmitRuntimeCall(
5207           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5208           DepWaitTaskArgs);
5209     // Call proxy_task_entry(gtid, new_task);
5210     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5211                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5212       Action.Enter(CGF);
5213       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5214       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5215                                                           OutlinedFnArgs);
5216     };
5217 
5218     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5219     // kmp_task_t *new_task);
5220     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5221     // kmp_task_t *new_task);
5222     RegionCodeGenTy RCG(CodeGen);
5223     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5224                               M, OMPRTL___kmpc_omp_task_begin_if0),
5225                           TaskArgs,
5226                           OMPBuilder.getOrCreateRuntimeFunction(
5227                               M, OMPRTL___kmpc_omp_task_complete_if0),
5228                           TaskArgs);
5229     RCG.setAction(Action);
5230     RCG(CGF);
5231   };
5232 
5233   if (IfCond) {
5234     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5235   } else {
5236     RegionCodeGenTy ThenRCG(ThenCodeGen);
5237     ThenRCG(CGF);
5238   }
5239 }
5240 
5241 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5242                                        const OMPLoopDirective &D,
5243                                        llvm::Function *TaskFunction,
5244                                        QualType SharedsTy, Address Shareds,
5245                                        const Expr *IfCond,
5246                                        const OMPTaskDataTy &Data) {
5247   if (!CGF.HaveInsertPoint())
5248     return;
5249   TaskResultTy Result =
5250       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5251   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5252   // libcall.
5253   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5254   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5255   // sched, kmp_uint64 grainsize, void *task_dup);
5256   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5257   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5258   llvm::Value *IfVal;
5259   if (IfCond) {
5260     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5261                                       /*isSigned=*/true);
5262   } else {
5263     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5264   }
5265 
5266   LValue LBLVal = CGF.EmitLValueForField(
5267       Result.TDBase,
5268       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5269   const auto *LBVar =
5270       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5271   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5272                        LBLVal.getQuals(),
5273                        /*IsInitializer=*/true);
5274   LValue UBLVal = CGF.EmitLValueForField(
5275       Result.TDBase,
5276       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5277   const auto *UBVar =
5278       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5279   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5280                        UBLVal.getQuals(),
5281                        /*IsInitializer=*/true);
5282   LValue StLVal = CGF.EmitLValueForField(
5283       Result.TDBase,
5284       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5285   const auto *StVar =
5286       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5287   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5288                        StLVal.getQuals(),
5289                        /*IsInitializer=*/true);
5290   // Store reductions address.
5291   LValue RedLVal = CGF.EmitLValueForField(
5292       Result.TDBase,
5293       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5294   if (Data.Reductions) {
5295     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5296   } else {
5297     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5298                                CGF.getContext().VoidPtrTy);
5299   }
5300   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5301   llvm::Value *TaskArgs[] = {
5302       UpLoc,
5303       ThreadID,
5304       Result.NewTask,
5305       IfVal,
5306       LBLVal.getPointer(CGF),
5307       UBLVal.getPointer(CGF),
5308       CGF.EmitLoadOfScalar(StLVal, Loc),
5309       llvm::ConstantInt::getSigned(
5310           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5311       llvm::ConstantInt::getSigned(
5312           CGF.IntTy, Data.Schedule.getPointer()
5313                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5314                          : NoSchedule),
5315       Data.Schedule.getPointer()
5316           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5317                                       /*isSigned=*/false)
5318           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5319       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5320                              Result.TaskDupFn, CGF.VoidPtrTy)
5321                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5322   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5323                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5324                       TaskArgs);
5325 }
5326 
5327 /// Emit reduction operation for each element of array (required for
5328 /// array sections) LHS op = RHS.
5329 /// \param Type Type of array.
5330 /// \param LHSVar Variable on the left side of the reduction operation
5331 /// (references element of array in original variable).
5332 /// \param RHSVar Variable on the right side of the reduction operation
5333 /// (references element of array in original variable).
5334 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5335 /// RHSVar.
5336 static void EmitOMPAggregateReduction(
5337     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5338     const VarDecl *RHSVar,
5339     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5340                                   const Expr *, const Expr *)> &RedOpGen,
5341     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5342     const Expr *UpExpr = nullptr) {
5343   // Perform element-by-element initialization.
5344   QualType ElementTy;
5345   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5346   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5347 
5348   // Drill down to the base element type on both arrays.
5349   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5350   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5351 
5352   llvm::Value *RHSBegin = RHSAddr.getPointer();
5353   llvm::Value *LHSBegin = LHSAddr.getPointer();
5354   // Cast from pointer to array type to pointer to single element.
5355   llvm::Value *LHSEnd =
5356       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5357   // The basic structure here is a while-do loop.
5358   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5359   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5360   llvm::Value *IsEmpty =
5361       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5362   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5363 
5364   // Enter the loop body, making that address the current address.
5365   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5366   CGF.EmitBlock(BodyBB);
5367 
5368   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5369 
5370   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5371       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5372   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5373   Address RHSElementCurrent =
5374       Address(RHSElementPHI,
5375               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5376 
5377   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5378       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5379   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5380   Address LHSElementCurrent =
5381       Address(LHSElementPHI,
5382               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5383 
5384   // Emit copy.
5385   CodeGenFunction::OMPPrivateScope Scope(CGF);
5386   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5387   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5388   Scope.Privatize();
5389   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5390   Scope.ForceCleanup();
5391 
5392   // Shift the address forward by one element.
5393   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5394       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5395       "omp.arraycpy.dest.element");
5396   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5397       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5398       "omp.arraycpy.src.element");
5399   // Check whether we've reached the end.
5400   llvm::Value *Done =
5401       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5402   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5403   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5404   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5405 
5406   // Done.
5407   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5408 }
5409 
5410 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5411 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5412 /// UDR combiner function.
5413 static void emitReductionCombiner(CodeGenFunction &CGF,
5414                                   const Expr *ReductionOp) {
5415   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5416     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5417       if (const auto *DRE =
5418               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5419         if (const auto *DRD =
5420                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5421           std::pair<llvm::Function *, llvm::Function *> Reduction =
5422               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5423           RValue Func = RValue::get(Reduction.first);
5424           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5425           CGF.EmitIgnoredExpr(ReductionOp);
5426           return;
5427         }
5428   CGF.EmitIgnoredExpr(ReductionOp);
5429 }
5430 
5431 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5432     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5433     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5434     ArrayRef<const Expr *> ReductionOps) {
5435   ASTContext &C = CGM.getContext();
5436 
5437   // void reduction_func(void *LHSArg, void *RHSArg);
5438   FunctionArgList Args;
5439   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5440                            ImplicitParamDecl::Other);
5441   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5442                            ImplicitParamDecl::Other);
5443   Args.push_back(&LHSArg);
5444   Args.push_back(&RHSArg);
5445   const auto &CGFI =
5446       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5447   std::string Name = getName({"omp", "reduction", "reduction_func"});
5448   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5449                                     llvm::GlobalValue::InternalLinkage, Name,
5450                                     &CGM.getModule());
5451   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5452   Fn->setDoesNotRecurse();
5453   CodeGenFunction CGF(CGM);
5454   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5455 
5456   // Dst = (void*[n])(LHSArg);
5457   // Src = (void*[n])(RHSArg);
5458   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5459       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5460       ArgsType), CGF.getPointerAlign());
5461   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5462       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5463       ArgsType), CGF.getPointerAlign());
5464 
5465   //  ...
5466   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5467   //  ...
5468   CodeGenFunction::OMPPrivateScope Scope(CGF);
5469   auto IPriv = Privates.begin();
5470   unsigned Idx = 0;
5471   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5472     const auto *RHSVar =
5473         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5474     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5475       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5476     });
5477     const auto *LHSVar =
5478         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5479     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5480       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5481     });
5482     QualType PrivTy = (*IPriv)->getType();
5483     if (PrivTy->isVariablyModifiedType()) {
5484       // Get array size and emit VLA type.
5485       ++Idx;
5486       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5487       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5488       const VariableArrayType *VLA =
5489           CGF.getContext().getAsVariableArrayType(PrivTy);
5490       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5491       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5492           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5493       CGF.EmitVariablyModifiedType(PrivTy);
5494     }
5495   }
5496   Scope.Privatize();
5497   IPriv = Privates.begin();
5498   auto ILHS = LHSExprs.begin();
5499   auto IRHS = RHSExprs.begin();
5500   for (const Expr *E : ReductionOps) {
5501     if ((*IPriv)->getType()->isArrayType()) {
5502       // Emit reduction for array section.
5503       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5504       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5505       EmitOMPAggregateReduction(
5506           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5507           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5508             emitReductionCombiner(CGF, E);
5509           });
5510     } else {
5511       // Emit reduction for array subscript or single variable.
5512       emitReductionCombiner(CGF, E);
5513     }
5514     ++IPriv;
5515     ++ILHS;
5516     ++IRHS;
5517   }
5518   Scope.ForceCleanup();
5519   CGF.FinishFunction();
5520   return Fn;
5521 }
5522 
5523 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5524                                                   const Expr *ReductionOp,
5525                                                   const Expr *PrivateRef,
5526                                                   const DeclRefExpr *LHS,
5527                                                   const DeclRefExpr *RHS) {
5528   if (PrivateRef->getType()->isArrayType()) {
5529     // Emit reduction for array section.
5530     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5531     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5532     EmitOMPAggregateReduction(
5533         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5534         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5535           emitReductionCombiner(CGF, ReductionOp);
5536         });
5537   } else {
5538     // Emit reduction for array subscript or single variable.
5539     emitReductionCombiner(CGF, ReductionOp);
5540   }
5541 }
5542 
5543 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5544                                     ArrayRef<const Expr *> Privates,
5545                                     ArrayRef<const Expr *> LHSExprs,
5546                                     ArrayRef<const Expr *> RHSExprs,
5547                                     ArrayRef<const Expr *> ReductionOps,
5548                                     ReductionOptionsTy Options) {
5549   if (!CGF.HaveInsertPoint())
5550     return;
5551 
5552   bool WithNowait = Options.WithNowait;
5553   bool SimpleReduction = Options.SimpleReduction;
5554 
5555   // Next code should be emitted for reduction:
5556   //
5557   // static kmp_critical_name lock = { 0 };
5558   //
5559   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5560   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5561   //  ...
5562   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5563   //  *(Type<n>-1*)rhs[<n>-1]);
5564   // }
5565   //
5566   // ...
5567   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5568   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5569   // RedList, reduce_func, &<lock>)) {
5570   // case 1:
5571   //  ...
5572   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5573   //  ...
5574   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5575   // break;
5576   // case 2:
5577   //  ...
5578   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5579   //  ...
5580   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5581   // break;
5582   // default:;
5583   // }
5584   //
5585   // if SimpleReduction is true, only the next code is generated:
5586   //  ...
5587   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5588   //  ...
5589 
5590   ASTContext &C = CGM.getContext();
5591 
5592   if (SimpleReduction) {
5593     CodeGenFunction::RunCleanupsScope Scope(CGF);
5594     auto IPriv = Privates.begin();
5595     auto ILHS = LHSExprs.begin();
5596     auto IRHS = RHSExprs.begin();
5597     for (const Expr *E : ReductionOps) {
5598       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5599                                   cast<DeclRefExpr>(*IRHS));
5600       ++IPriv;
5601       ++ILHS;
5602       ++IRHS;
5603     }
5604     return;
5605   }
5606 
5607   // 1. Build a list of reduction variables.
5608   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5609   auto Size = RHSExprs.size();
5610   for (const Expr *E : Privates) {
5611     if (E->getType()->isVariablyModifiedType())
5612       // Reserve place for array size.
5613       ++Size;
5614   }
5615   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5616   QualType ReductionArrayTy =
5617       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5618                              /*IndexTypeQuals=*/0);
5619   Address ReductionList =
5620       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5621   auto IPriv = Privates.begin();
5622   unsigned Idx = 0;
5623   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5624     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5625     CGF.Builder.CreateStore(
5626         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5627             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5628         Elem);
5629     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5630       // Store array size.
5631       ++Idx;
5632       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5633       llvm::Value *Size = CGF.Builder.CreateIntCast(
5634           CGF.getVLASize(
5635                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5636               .NumElts,
5637           CGF.SizeTy, /*isSigned=*/false);
5638       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5639                               Elem);
5640     }
5641   }
5642 
5643   // 2. Emit reduce_func().
5644   llvm::Function *ReductionFn = emitReductionFunction(
5645       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5646       LHSExprs, RHSExprs, ReductionOps);
5647 
5648   // 3. Create static kmp_critical_name lock = { 0 };
5649   std::string Name = getName({"reduction"});
5650   llvm::Value *Lock = getCriticalRegionLock(Name);
5651 
5652   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5653   // RedList, reduce_func, &<lock>);
5654   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5655   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5656   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5657   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5658       ReductionList.getPointer(), CGF.VoidPtrTy);
5659   llvm::Value *Args[] = {
5660       IdentTLoc,                             // ident_t *<loc>
5661       ThreadId,                              // i32 <gtid>
5662       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5663       ReductionArrayTySize,                  // size_type sizeof(RedList)
5664       RL,                                    // void *RedList
5665       ReductionFn, // void (*) (void *, void *) <reduce_func>
5666       Lock         // kmp_critical_name *&<lock>
5667   };
5668   llvm::Value *Res = CGF.EmitRuntimeCall(
5669       OMPBuilder.getOrCreateRuntimeFunction(
5670           CGM.getModule(),
5671           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5672       Args);
5673 
5674   // 5. Build switch(res)
5675   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5676   llvm::SwitchInst *SwInst =
5677       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5678 
5679   // 6. Build case 1:
5680   //  ...
5681   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5682   //  ...
5683   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5684   // break;
5685   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5686   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5687   CGF.EmitBlock(Case1BB);
5688 
5689   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5690   llvm::Value *EndArgs[] = {
5691       IdentTLoc, // ident_t *<loc>
5692       ThreadId,  // i32 <gtid>
5693       Lock       // kmp_critical_name *&<lock>
5694   };
5695   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5696                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5697     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5698     auto IPriv = Privates.begin();
5699     auto ILHS = LHSExprs.begin();
5700     auto IRHS = RHSExprs.begin();
5701     for (const Expr *E : ReductionOps) {
5702       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5703                                      cast<DeclRefExpr>(*IRHS));
5704       ++IPriv;
5705       ++ILHS;
5706       ++IRHS;
5707     }
5708   };
5709   RegionCodeGenTy RCG(CodeGen);
5710   CommonActionTy Action(
5711       nullptr, llvm::None,
5712       OMPBuilder.getOrCreateRuntimeFunction(
5713           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5714                                       : OMPRTL___kmpc_end_reduce),
5715       EndArgs);
5716   RCG.setAction(Action);
5717   RCG(CGF);
5718 
5719   CGF.EmitBranch(DefaultBB);
5720 
5721   // 7. Build case 2:
5722   //  ...
5723   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5724   //  ...
5725   // break;
5726   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5727   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5728   CGF.EmitBlock(Case2BB);
5729 
5730   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5731                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5732     auto ILHS = LHSExprs.begin();
5733     auto IRHS = RHSExprs.begin();
5734     auto IPriv = Privates.begin();
5735     for (const Expr *E : ReductionOps) {
5736       const Expr *XExpr = nullptr;
5737       const Expr *EExpr = nullptr;
5738       const Expr *UpExpr = nullptr;
5739       BinaryOperatorKind BO = BO_Comma;
5740       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5741         if (BO->getOpcode() == BO_Assign) {
5742           XExpr = BO->getLHS();
5743           UpExpr = BO->getRHS();
5744         }
5745       }
5746       // Try to emit update expression as a simple atomic.
5747       const Expr *RHSExpr = UpExpr;
5748       if (RHSExpr) {
5749         // Analyze RHS part of the whole expression.
5750         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5751                 RHSExpr->IgnoreParenImpCasts())) {
5752           // If this is a conditional operator, analyze its condition for
5753           // min/max reduction operator.
5754           RHSExpr = ACO->getCond();
5755         }
5756         if (const auto *BORHS =
5757                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5758           EExpr = BORHS->getRHS();
5759           BO = BORHS->getOpcode();
5760         }
5761       }
5762       if (XExpr) {
5763         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5764         auto &&AtomicRedGen = [BO, VD,
5765                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5766                                     const Expr *EExpr, const Expr *UpExpr) {
5767           LValue X = CGF.EmitLValue(XExpr);
5768           RValue E;
5769           if (EExpr)
5770             E = CGF.EmitAnyExpr(EExpr);
5771           CGF.EmitOMPAtomicSimpleUpdateExpr(
5772               X, E, BO, /*IsXLHSInRHSPart=*/true,
5773               llvm::AtomicOrdering::Monotonic, Loc,
5774               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5775                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5776                 PrivateScope.addPrivate(
5777                     VD, [&CGF, VD, XRValue, Loc]() {
5778                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5779                       CGF.emitOMPSimpleStore(
5780                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5781                           VD->getType().getNonReferenceType(), Loc);
5782                       return LHSTemp;
5783                     });
5784                 (void)PrivateScope.Privatize();
5785                 return CGF.EmitAnyExpr(UpExpr);
5786               });
5787         };
5788         if ((*IPriv)->getType()->isArrayType()) {
5789           // Emit atomic reduction for array section.
5790           const auto *RHSVar =
5791               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5792           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5793                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5794         } else {
5795           // Emit atomic reduction for array subscript or single variable.
5796           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5797         }
5798       } else {
5799         // Emit as a critical region.
5800         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5801                                            const Expr *, const Expr *) {
5802           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5803           std::string Name = RT.getName({"atomic_reduction"});
5804           RT.emitCriticalRegion(
5805               CGF, Name,
5806               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5807                 Action.Enter(CGF);
5808                 emitReductionCombiner(CGF, E);
5809               },
5810               Loc);
5811         };
5812         if ((*IPriv)->getType()->isArrayType()) {
5813           const auto *LHSVar =
5814               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5815           const auto *RHSVar =
5816               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5817           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5818                                     CritRedGen);
5819         } else {
5820           CritRedGen(CGF, nullptr, nullptr, nullptr);
5821         }
5822       }
5823       ++ILHS;
5824       ++IRHS;
5825       ++IPriv;
5826     }
5827   };
5828   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5829   if (!WithNowait) {
5830     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5831     llvm::Value *EndArgs[] = {
5832         IdentTLoc, // ident_t *<loc>
5833         ThreadId,  // i32 <gtid>
5834         Lock       // kmp_critical_name *&<lock>
5835     };
5836     CommonActionTy Action(nullptr, llvm::None,
5837                           OMPBuilder.getOrCreateRuntimeFunction(
5838                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5839                           EndArgs);
5840     AtomicRCG.setAction(Action);
5841     AtomicRCG(CGF);
5842   } else {
5843     AtomicRCG(CGF);
5844   }
5845 
5846   CGF.EmitBranch(DefaultBB);
5847   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5848 }
5849 
5850 /// Generates unique name for artificial threadprivate variables.
5851 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5852 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5853                                       const Expr *Ref) {
5854   SmallString<256> Buffer;
5855   llvm::raw_svector_ostream Out(Buffer);
5856   const clang::DeclRefExpr *DE;
5857   const VarDecl *D = ::getBaseDecl(Ref, DE);
5858   if (!D)
5859     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5860   D = D->getCanonicalDecl();
5861   std::string Name = CGM.getOpenMPRuntime().getName(
5862       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5863   Out << Prefix << Name << "_"
5864       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5865   return std::string(Out.str());
5866 }
5867 
5868 /// Emits reduction initializer function:
5869 /// \code
5870 /// void @.red_init(void* %arg, void* %orig) {
5871 /// %0 = bitcast void* %arg to <type>*
5872 /// store <type> <init>, <type>* %0
5873 /// ret void
5874 /// }
5875 /// \endcode
5876 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5877                                            SourceLocation Loc,
5878                                            ReductionCodeGen &RCG, unsigned N) {
5879   ASTContext &C = CGM.getContext();
5880   QualType VoidPtrTy = C.VoidPtrTy;
5881   VoidPtrTy.addRestrict();
5882   FunctionArgList Args;
5883   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5884                           ImplicitParamDecl::Other);
5885   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5886                               ImplicitParamDecl::Other);
5887   Args.emplace_back(&Param);
5888   Args.emplace_back(&ParamOrig);
5889   const auto &FnInfo =
5890       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5891   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5892   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5893   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5894                                     Name, &CGM.getModule());
5895   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5896   Fn->setDoesNotRecurse();
5897   CodeGenFunction CGF(CGM);
5898   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5899   Address PrivateAddr = CGF.EmitLoadOfPointer(
5900       CGF.GetAddrOfLocalVar(&Param),
5901       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5902   llvm::Value *Size = nullptr;
5903   // If the size of the reduction item is non-constant, load it from global
5904   // threadprivate variable.
5905   if (RCG.getSizes(N).second) {
5906     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5907         CGF, CGM.getContext().getSizeType(),
5908         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5909     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5910                                 CGM.getContext().getSizeType(), Loc);
5911   }
5912   RCG.emitAggregateType(CGF, N, Size);
5913   Address OrigAddr = Address::invalid();
5914   // If initializer uses initializer from declare reduction construct, emit a
5915   // pointer to the address of the original reduction item (reuired by reduction
5916   // initializer)
5917   if (RCG.usesReductionInitializer(N)) {
5918     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5919     OrigAddr = CGF.EmitLoadOfPointer(
5920         SharedAddr,
5921         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5922   }
5923   // Emit the initializer:
5924   // %0 = bitcast void* %arg to <type>*
5925   // store <type> <init>, <type>* %0
5926   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5927                          [](CodeGenFunction &) { return false; });
5928   CGF.FinishFunction();
5929   return Fn;
5930 }
5931 
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N,
5945                                            const Expr *ReductionOp,
5946                                            const Expr *LHS, const Expr *RHS,
5947                                            const Expr *PrivateRef) {
5948   ASTContext &C = CGM.getContext();
5949   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951   FunctionArgList Args;
5952   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953                                C.VoidPtrTy, ImplicitParamDecl::Other);
5954   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955                             ImplicitParamDecl::Other);
5956   Args.emplace_back(&ParamInOut);
5957   Args.emplace_back(&ParamIn);
5958   const auto &FnInfo =
5959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963                                     Name, &CGM.getModule());
5964   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965   Fn->setDoesNotRecurse();
5966   CodeGenFunction CGF(CGM);
5967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968   llvm::Value *Size = nullptr;
5969   // If the size of the reduction item is non-constant, load it from global
5970   // threadprivate variable.
5971   if (RCG.getSizes(N).second) {
5972     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973         CGF, CGM.getContext().getSizeType(),
5974         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976                                 CGM.getContext().getSizeType(), Loc);
5977   }
5978   RCG.emitAggregateType(CGF, N, Size);
5979   // Remap lhs and rhs variables to the addresses of the function arguments.
5980   // %lhs = bitcast void* %arg0 to <type>*
5981   // %rhs = bitcast void* %arg1 to <type>*
5982   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamInOut),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990   });
5991   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992     // Pull out the pointer to the variable.
5993     Address PtrAddr = CGF.EmitLoadOfPointer(
5994         CGF.GetAddrOfLocalVar(&ParamIn),
5995         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996     return CGF.Builder.CreateElementBitCast(
5997         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998   });
5999   PrivateScope.Privatize();
6000   // Emit the combiner body:
6001   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002   // store <type> %2, <type>* %lhs
6003   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005       cast<DeclRefExpr>(RHS));
6006   CGF.FinishFunction();
6007   return Fn;
6008 }
6009 
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019                                            SourceLocation Loc,
6020                                            ReductionCodeGen &RCG, unsigned N) {
6021   if (!RCG.needCleanups(N))
6022     return nullptr;
6023   ASTContext &C = CGM.getContext();
6024   FunctionArgList Args;
6025   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026                           ImplicitParamDecl::Other);
6027   Args.emplace_back(&Param);
6028   const auto &FnInfo =
6029       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033                                     Name, &CGM.getModule());
6034   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035   Fn->setDoesNotRecurse();
6036   CodeGenFunction CGF(CGM);
6037   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038   Address PrivateAddr = CGF.EmitLoadOfPointer(
6039       CGF.GetAddrOfLocalVar(&Param),
6040       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041   llvm::Value *Size = nullptr;
6042   // If the size of the reduction item is non-constant, load it from global
6043   // threadprivate variable.
6044   if (RCG.getSizes(N).second) {
6045     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046         CGF, CGM.getContext().getSizeType(),
6047         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049                                 CGM.getContext().getSizeType(), Loc);
6050   }
6051   RCG.emitAggregateType(CGF, N, Size);
6052   // Emit the finalizer body:
6053   // <destroy>(<type>* %0)
6054   RCG.emitCleanups(CGF, N, PrivateAddr);
6055   CGF.FinishFunction(Loc);
6056   return Fn;
6057 }
6058 
6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063     return nullptr;
6064 
6065   // Build typedef struct:
6066   // kmp_taskred_input {
6067   //   void *reduce_shar; // shared reduction item
6068   //   void *reduce_orig; // original reduction item used for initialization
6069   //   size_t reduce_size; // size of data item
6070   //   void *reduce_init; // data initialization routine
6071   //   void *reduce_fini; // data finalization routine
6072   //   void *reduce_comb; // data combiner routine
6073   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6074   // } kmp_taskred_input_t;
6075   ASTContext &C = CGM.getContext();
6076   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077   RD->startDefinition();
6078   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086   RD->completeDefinition();
6087   QualType RDType = C.getRecordType(RD);
6088   unsigned Size = Data.ReductionVars.size();
6089   llvm::APInt ArraySize(/*numBits=*/64, Size);
6090   QualType ArrayRDType = C.getConstantArrayType(
6091       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092   // kmp_task_red_input_t .rd_input.[Size];
6093   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095                        Data.ReductionCopies, Data.ReductionOps);
6096   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6102         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103         ".rd_input.gep.");
6104     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105     // ElemLVal.reduce_shar = &Shareds[Cnt];
6106     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107     RCG.emitSharedOrigLValue(CGF, Cnt);
6108     llvm::Value *CastedShared =
6109         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111     // ElemLVal.reduce_orig = &Origs[Cnt];
6112     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113     llvm::Value *CastedOrig =
6114         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116     RCG.emitAggregateType(CGF, Cnt);
6117     llvm::Value *SizeValInChars;
6118     llvm::Value *SizeVal;
6119     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120     // We use delayed creation/initialization for VLAs and array sections. It is
6121     // required because runtime does not provide the way to pass the sizes of
6122     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123     // threadprivate global variables are used to store these values and use
6124     // them in the functions.
6125     bool DelayedCreation = !!SizeVal;
6126     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127                                                /*isSigned=*/false);
6128     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130     // ElemLVal.reduce_init = init;
6131     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132     llvm::Value *InitAddr =
6133         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135     // ElemLVal.reduce_fini = fini;
6136     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138     llvm::Value *FiniAddr = Fini
6139                                 ? CGF.EmitCastToVoidPtr(Fini)
6140                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142     // ElemLVal.reduce_comb = comb;
6143     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148     // ElemLVal.flags = 0;
6149     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150     if (DelayedCreation) {
6151       CGF.EmitStoreOfScalar(
6152           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153           FlagsLVal);
6154     } else
6155       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156                                  FlagsLVal.getType());
6157   }
6158   if (Data.IsReductionWithTaskMod) {
6159     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160     // is_ws, int num, void *data);
6161     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                   CGM.IntTy, /*isSigned=*/true);
6164     llvm::Value *Args[] = {
6165         IdentTLoc, GTid,
6166         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167                                /*isSigned=*/true),
6168         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171     return CGF.EmitRuntimeCall(
6172         OMPBuilder.getOrCreateRuntimeFunction(
6173             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174         Args);
6175   }
6176   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177   llvm::Value *Args[] = {
6178       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179                                 /*isSigned=*/true),
6180       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182                                                       CGM.VoidPtrTy)};
6183   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185                              Args);
6186 }
6187 
6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189                                             SourceLocation Loc,
6190                                             bool IsWorksharingReduction) {
6191   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192   // is_ws, int num, void *data);
6193   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195                                                 CGM.IntTy, /*isSigned=*/true);
6196   llvm::Value *Args[] = {IdentTLoc, GTid,
6197                          llvm::ConstantInt::get(CGM.IntTy,
6198                                                 IsWorksharingReduction ? 1 : 0,
6199                                                 /*isSigned=*/true)};
6200   (void)CGF.EmitRuntimeCall(
6201       OMPBuilder.getOrCreateRuntimeFunction(
6202           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203       Args);
6204 }
6205 
6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207                                               SourceLocation Loc,
6208                                               ReductionCodeGen &RCG,
6209                                               unsigned N) {
6210   auto Sizes = RCG.getSizes(N);
6211   // Emit threadprivate global variable if the type is non-constant
6212   // (Sizes.second = nullptr).
6213   if (Sizes.second) {
6214     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215                                                      /*isSigned=*/false);
6216     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217         CGF, CGM.getContext().getSizeType(),
6218         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220   }
6221 }
6222 
6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224                                               SourceLocation Loc,
6225                                               llvm::Value *ReductionsPtr,
6226                                               LValue SharedLVal) {
6227   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228   // *d);
6229   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230                                                    CGM.IntTy,
6231                                                    /*isSigned=*/true),
6232                          ReductionsPtr,
6233                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235   return Address(
6236       CGF.EmitRuntimeCall(
6237           OMPBuilder.getOrCreateRuntimeFunction(
6238               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239           Args),
6240       SharedLVal.getAlignment());
6241 }
6242 
6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6244                                        const OMPTaskDataTy &Data) {
6245   if (!CGF.HaveInsertPoint())
6246     return;
6247 
6248   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6249     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6250     OMPBuilder.createTaskwait(CGF.Builder);
6251   } else {
6252     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6253     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6254     auto &M = CGM.getModule();
6255     Address DependenciesArray = Address::invalid();
6256     llvm::Value *NumOfElements;
6257     std::tie(NumOfElements, DependenciesArray) =
6258         emitDependClause(CGF, Data.Dependences, Loc);
6259     llvm::Value *DepWaitTaskArgs[6];
6260     if (!Data.Dependences.empty()) {
6261       DepWaitTaskArgs[0] = UpLoc;
6262       DepWaitTaskArgs[1] = ThreadID;
6263       DepWaitTaskArgs[2] = NumOfElements;
6264       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6265       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6266       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6267 
6268       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6269 
6270       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6271       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6272       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6273       // is specified.
6274       CGF.EmitRuntimeCall(
6275           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6276           DepWaitTaskArgs);
6277 
6278     } else {
6279 
6280       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6281       // global_tid);
6282       llvm::Value *Args[] = {UpLoc, ThreadID};
6283       // Ignore return result until untied tasks are supported.
6284       CGF.EmitRuntimeCall(
6285           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6286           Args);
6287     }
6288   }
6289 
6290   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6291     Region->emitUntiedSwitch(CGF);
6292 }
6293 
6294 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6295                                            OpenMPDirectiveKind InnerKind,
6296                                            const RegionCodeGenTy &CodeGen,
6297                                            bool HasCancel) {
6298   if (!CGF.HaveInsertPoint())
6299     return;
6300   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6301                                  InnerKind != OMPD_critical &&
6302                                      InnerKind != OMPD_master &&
6303                                      InnerKind != OMPD_masked);
6304   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6305 }
6306 
6307 namespace {
6308 enum RTCancelKind {
6309   CancelNoreq = 0,
6310   CancelParallel = 1,
6311   CancelLoop = 2,
6312   CancelSections = 3,
6313   CancelTaskgroup = 4
6314 };
6315 } // anonymous namespace
6316 
6317 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6318   RTCancelKind CancelKind = CancelNoreq;
6319   if (CancelRegion == OMPD_parallel)
6320     CancelKind = CancelParallel;
6321   else if (CancelRegion == OMPD_for)
6322     CancelKind = CancelLoop;
6323   else if (CancelRegion == OMPD_sections)
6324     CancelKind = CancelSections;
6325   else {
6326     assert(CancelRegion == OMPD_taskgroup);
6327     CancelKind = CancelTaskgroup;
6328   }
6329   return CancelKind;
6330 }
6331 
6332 void CGOpenMPRuntime::emitCancellationPointCall(
6333     CodeGenFunction &CGF, SourceLocation Loc,
6334     OpenMPDirectiveKind CancelRegion) {
6335   if (!CGF.HaveInsertPoint())
6336     return;
6337   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6338   // global_tid, kmp_int32 cncl_kind);
6339   if (auto *OMPRegionInfo =
6340           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6341     // For 'cancellation point taskgroup', the task region info may not have a
6342     // cancel. This may instead happen in another adjacent task.
6343     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6344       llvm::Value *Args[] = {
6345           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6346           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6347       // Ignore return result until untied tasks are supported.
6348       llvm::Value *Result = CGF.EmitRuntimeCall(
6349           OMPBuilder.getOrCreateRuntimeFunction(
6350               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6351           Args);
6352       // if (__kmpc_cancellationpoint()) {
6353       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6354       //   exit from construct;
6355       // }
6356       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6357       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6358       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6359       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6360       CGF.EmitBlock(ExitBB);
6361       if (CancelRegion == OMPD_parallel)
6362         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6363       // exit from construct;
6364       CodeGenFunction::JumpDest CancelDest =
6365           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6366       CGF.EmitBranchThroughCleanup(CancelDest);
6367       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6368     }
6369   }
6370 }
6371 
6372 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6373                                      const Expr *IfCond,
6374                                      OpenMPDirectiveKind CancelRegion) {
6375   if (!CGF.HaveInsertPoint())
6376     return;
6377   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6378   // kmp_int32 cncl_kind);
6379   auto &M = CGM.getModule();
6380   if (auto *OMPRegionInfo =
6381           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6382     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6383                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6384       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6385       llvm::Value *Args[] = {
6386           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6387           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6388       // Ignore return result until untied tasks are supported.
6389       llvm::Value *Result = CGF.EmitRuntimeCall(
6390           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6391       // if (__kmpc_cancel()) {
6392       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6393       //   exit from construct;
6394       // }
6395       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6396       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6397       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6398       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6399       CGF.EmitBlock(ExitBB);
6400       if (CancelRegion == OMPD_parallel)
6401         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6402       // exit from construct;
6403       CodeGenFunction::JumpDest CancelDest =
6404           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6405       CGF.EmitBranchThroughCleanup(CancelDest);
6406       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6407     };
6408     if (IfCond) {
6409       emitIfClause(CGF, IfCond, ThenGen,
6410                    [](CodeGenFunction &, PrePostActionTy &) {});
6411     } else {
6412       RegionCodeGenTy ThenRCG(ThenGen);
6413       ThenRCG(CGF);
6414     }
6415   }
6416 }
6417 
6418 namespace {
6419 /// Cleanup action for uses_allocators support.
6420 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6421   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6422 
6423 public:
6424   OMPUsesAllocatorsActionTy(
6425       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6426       : Allocators(Allocators) {}
6427   void Enter(CodeGenFunction &CGF) override {
6428     if (!CGF.HaveInsertPoint())
6429       return;
6430     for (const auto &AllocatorData : Allocators) {
6431       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6432           CGF, AllocatorData.first, AllocatorData.second);
6433     }
6434   }
6435   void Exit(CodeGenFunction &CGF) override {
6436     if (!CGF.HaveInsertPoint())
6437       return;
6438     for (const auto &AllocatorData : Allocators) {
6439       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6440                                                         AllocatorData.first);
6441     }
6442   }
6443 };
6444 } // namespace
6445 
6446 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6447     const OMPExecutableDirective &D, StringRef ParentName,
6448     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6449     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6450   assert(!ParentName.empty() && "Invalid target region parent name!");
6451   HasEmittedTargetRegion = true;
6452   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6453   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6454     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6455       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6456       if (!D.AllocatorTraits)
6457         continue;
6458       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6459     }
6460   }
6461   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6462   CodeGen.setAction(UsesAllocatorAction);
6463   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6464                                    IsOffloadEntry, CodeGen);
6465 }
6466 
6467 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6468                                              const Expr *Allocator,
6469                                              const Expr *AllocatorTraits) {
6470   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6471   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6472   // Use default memspace handle.
6473   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6474   llvm::Value *NumTraits = llvm::ConstantInt::get(
6475       CGF.IntTy, cast<ConstantArrayType>(
6476                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6477                      ->getSize()
6478                      .getLimitedValue());
6479   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6480   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6481       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6482   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6483                                            AllocatorTraitsLVal.getBaseInfo(),
6484                                            AllocatorTraitsLVal.getTBAAInfo());
6485   llvm::Value *Traits =
6486       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6487 
6488   llvm::Value *AllocatorVal =
6489       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6490                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6491                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6492   // Store to allocator.
6493   CGF.EmitVarDecl(*cast<VarDecl>(
6494       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6495   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6496   AllocatorVal =
6497       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6498                                Allocator->getType(), Allocator->getExprLoc());
6499   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6500 }
6501 
6502 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6503                                              const Expr *Allocator) {
6504   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6505   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6506   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6507   llvm::Value *AllocatorVal =
6508       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6509   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6510                                           CGF.getContext().VoidPtrTy,
6511                                           Allocator->getExprLoc());
6512   (void)CGF.EmitRuntimeCall(
6513       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6514                                             OMPRTL___kmpc_destroy_allocator),
6515       {ThreadId, AllocatorVal});
6516 }
6517 
6518 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6519     const OMPExecutableDirective &D, StringRef ParentName,
6520     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6521     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6522   // Create a unique name for the entry function using the source location
6523   // information of the current target region. The name will be something like:
6524   //
6525   // __omp_offloading_DD_FFFF_PP_lBB
6526   //
6527   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6528   // mangled name of the function that encloses the target region and BB is the
6529   // line number of the target region.
6530 
6531   unsigned DeviceID;
6532   unsigned FileID;
6533   unsigned Line;
6534   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6535                            Line);
6536   SmallString<64> EntryFnName;
6537   {
6538     llvm::raw_svector_ostream OS(EntryFnName);
6539     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6540        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6541   }
6542 
6543   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6544 
6545   CodeGenFunction CGF(CGM, true);
6546   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6547   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6548 
6549   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6550 
6551   // If this target outline function is not an offload entry, we don't need to
6552   // register it.
6553   if (!IsOffloadEntry)
6554     return;
6555 
6556   // The target region ID is used by the runtime library to identify the current
6557   // target region, so it only has to be unique and not necessarily point to
6558   // anything. It could be the pointer to the outlined function that implements
6559   // the target region, but we aren't using that so that the compiler doesn't
6560   // need to keep that, and could therefore inline the host function if proven
6561   // worthwhile during optimization. In the other hand, if emitting code for the
6562   // device, the ID has to be the function address so that it can retrieved from
6563   // the offloading entry and launched by the runtime library. We also mark the
6564   // outlined function to have external linkage in case we are emitting code for
6565   // the device, because these functions will be entry points to the device.
6566 
6567   if (CGM.getLangOpts().OpenMPIsDevice) {
6568     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6569     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6570     OutlinedFn->setDSOLocal(false);
6571     if (CGM.getTriple().isAMDGCN())
6572       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6573   } else {
6574     std::string Name = getName({EntryFnName, "region_id"});
6575     OutlinedFnID = new llvm::GlobalVariable(
6576         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6577         llvm::GlobalValue::WeakAnyLinkage,
6578         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6579   }
6580 
6581   // Register the information for the entry associated with this target region.
6582   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6583       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6584       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6585 
6586   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6587   int32_t DefaultValTeams = -1;
6588   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6589   if (DefaultValTeams > 0) {
6590     OutlinedFn->addFnAttr("omp_target_num_teams",
6591                           std::to_string(DefaultValTeams));
6592   }
6593   int32_t DefaultValThreads = -1;
6594   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6595   if (DefaultValThreads > 0) {
6596     OutlinedFn->addFnAttr("omp_target_thread_limit",
6597                           std::to_string(DefaultValThreads));
6598   }
6599 
6600   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6601 }
6602 
6603 /// Checks if the expression is constant or does not have non-trivial function
6604 /// calls.
6605 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6606   // We can skip constant expressions.
6607   // We can skip expressions with trivial calls or simple expressions.
6608   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6609           !E->hasNonTrivialCall(Ctx)) &&
6610          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6611 }
6612 
6613 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6614                                                     const Stmt *Body) {
6615   const Stmt *Child = Body->IgnoreContainers();
6616   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6617     Child = nullptr;
6618     for (const Stmt *S : C->body()) {
6619       if (const auto *E = dyn_cast<Expr>(S)) {
6620         if (isTrivial(Ctx, E))
6621           continue;
6622       }
6623       // Some of the statements can be ignored.
6624       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6625           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6626         continue;
6627       // Analyze declarations.
6628       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6629         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6630               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6631                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6632                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6633                   isa<UsingDirectiveDecl>(D) ||
6634                   isa<OMPDeclareReductionDecl>(D) ||
6635                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6636                 return true;
6637               const auto *VD = dyn_cast<VarDecl>(D);
6638               if (!VD)
6639                 return false;
6640               return VD->hasGlobalStorage() || !VD->isUsed();
6641             }))
6642           continue;
6643       }
6644       // Found multiple children - cannot get the one child only.
6645       if (Child)
6646         return nullptr;
6647       Child = S;
6648     }
6649     if (Child)
6650       Child = Child->IgnoreContainers();
6651   }
6652   return Child;
6653 }
6654 
6655 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6656     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6657     int32_t &DefaultVal) {
6658 
6659   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6660   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6661          "Expected target-based executable directive.");
6662   switch (DirectiveKind) {
6663   case OMPD_target: {
6664     const auto *CS = D.getInnermostCapturedStmt();
6665     const auto *Body =
6666         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6667     const Stmt *ChildStmt =
6668         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6669     if (const auto *NestedDir =
6670             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6671       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6672         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6673           const Expr *NumTeams =
6674               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6675           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6676             if (auto Constant =
6677                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6678               DefaultVal = Constant->getExtValue();
6679           return NumTeams;
6680         }
6681         DefaultVal = 0;
6682         return nullptr;
6683       }
6684       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6685           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6686         DefaultVal = 1;
6687         return nullptr;
6688       }
6689       DefaultVal = 1;
6690       return nullptr;
6691     }
6692     // A value of -1 is used to check if we need to emit no teams region
6693     DefaultVal = -1;
6694     return nullptr;
6695   }
6696   case OMPD_target_teams:
6697   case OMPD_target_teams_distribute:
6698   case OMPD_target_teams_distribute_simd:
6699   case OMPD_target_teams_distribute_parallel_for:
6700   case OMPD_target_teams_distribute_parallel_for_simd: {
6701     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6702       const Expr *NumTeams =
6703           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6704       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6705         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6706           DefaultVal = Constant->getExtValue();
6707       return NumTeams;
6708     }
6709     DefaultVal = 0;
6710     return nullptr;
6711   }
6712   case OMPD_target_parallel:
6713   case OMPD_target_parallel_for:
6714   case OMPD_target_parallel_for_simd:
6715   case OMPD_target_simd:
6716     DefaultVal = 1;
6717     return nullptr;
6718   case OMPD_parallel:
6719   case OMPD_for:
6720   case OMPD_parallel_for:
6721   case OMPD_parallel_master:
6722   case OMPD_parallel_sections:
6723   case OMPD_for_simd:
6724   case OMPD_parallel_for_simd:
6725   case OMPD_cancel:
6726   case OMPD_cancellation_point:
6727   case OMPD_ordered:
6728   case OMPD_threadprivate:
6729   case OMPD_allocate:
6730   case OMPD_task:
6731   case OMPD_simd:
6732   case OMPD_tile:
6733   case OMPD_unroll:
6734   case OMPD_sections:
6735   case OMPD_section:
6736   case OMPD_single:
6737   case OMPD_master:
6738   case OMPD_critical:
6739   case OMPD_taskyield:
6740   case OMPD_barrier:
6741   case OMPD_taskwait:
6742   case OMPD_taskgroup:
6743   case OMPD_atomic:
6744   case OMPD_flush:
6745   case OMPD_depobj:
6746   case OMPD_scan:
6747   case OMPD_teams:
6748   case OMPD_target_data:
6749   case OMPD_target_exit_data:
6750   case OMPD_target_enter_data:
6751   case OMPD_distribute:
6752   case OMPD_distribute_simd:
6753   case OMPD_distribute_parallel_for:
6754   case OMPD_distribute_parallel_for_simd:
6755   case OMPD_teams_distribute:
6756   case OMPD_teams_distribute_simd:
6757   case OMPD_teams_distribute_parallel_for:
6758   case OMPD_teams_distribute_parallel_for_simd:
6759   case OMPD_target_update:
6760   case OMPD_declare_simd:
6761   case OMPD_declare_variant:
6762   case OMPD_begin_declare_variant:
6763   case OMPD_end_declare_variant:
6764   case OMPD_declare_target:
6765   case OMPD_end_declare_target:
6766   case OMPD_declare_reduction:
6767   case OMPD_declare_mapper:
6768   case OMPD_taskloop:
6769   case OMPD_taskloop_simd:
6770   case OMPD_master_taskloop:
6771   case OMPD_master_taskloop_simd:
6772   case OMPD_parallel_master_taskloop:
6773   case OMPD_parallel_master_taskloop_simd:
6774   case OMPD_requires:
6775   case OMPD_metadirective:
6776   case OMPD_unknown:
6777     break;
6778   default:
6779     break;
6780   }
6781   llvm_unreachable("Unexpected directive kind.");
6782 }
6783 
6784 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6785     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6786   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6787          "Clauses associated with the teams directive expected to be emitted "
6788          "only for the host!");
6789   CGBuilderTy &Bld = CGF.Builder;
6790   int32_t DefaultNT = -1;
6791   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6792   if (NumTeams != nullptr) {
6793     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6794 
6795     switch (DirectiveKind) {
6796     case OMPD_target: {
6797       const auto *CS = D.getInnermostCapturedStmt();
6798       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6799       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6800       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6801                                                   /*IgnoreResultAssign*/ true);
6802       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6803                              /*isSigned=*/true);
6804     }
6805     case OMPD_target_teams:
6806     case OMPD_target_teams_distribute:
6807     case OMPD_target_teams_distribute_simd:
6808     case OMPD_target_teams_distribute_parallel_for:
6809     case OMPD_target_teams_distribute_parallel_for_simd: {
6810       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6811       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6812                                                   /*IgnoreResultAssign*/ true);
6813       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6814                              /*isSigned=*/true);
6815     }
6816     default:
6817       break;
6818     }
6819   } else if (DefaultNT == -1) {
6820     return nullptr;
6821   }
6822 
6823   return Bld.getInt32(DefaultNT);
6824 }
6825 
6826 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6827                                   llvm::Value *DefaultThreadLimitVal) {
6828   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6829       CGF.getContext(), CS->getCapturedStmt());
6830   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6831     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6832       llvm::Value *NumThreads = nullptr;
6833       llvm::Value *CondVal = nullptr;
6834       // Handle if clause. If if clause present, the number of threads is
6835       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6836       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6837         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6838         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6839         const OMPIfClause *IfClause = nullptr;
6840         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6841           if (C->getNameModifier() == OMPD_unknown ||
6842               C->getNameModifier() == OMPD_parallel) {
6843             IfClause = C;
6844             break;
6845           }
6846         }
6847         if (IfClause) {
6848           const Expr *Cond = IfClause->getCondition();
6849           bool Result;
6850           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6851             if (!Result)
6852               return CGF.Builder.getInt32(1);
6853           } else {
6854             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6855             if (const auto *PreInit =
6856                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6857               for (const auto *I : PreInit->decls()) {
6858                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6859                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6860                 } else {
6861                   CodeGenFunction::AutoVarEmission Emission =
6862                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6863                   CGF.EmitAutoVarCleanups(Emission);
6864                 }
6865               }
6866             }
6867             CondVal = CGF.EvaluateExprAsBool(Cond);
6868           }
6869         }
6870       }
6871       // Check the value of num_threads clause iff if clause was not specified
6872       // or is not evaluated to false.
6873       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6874         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6875         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6876         const auto *NumThreadsClause =
6877             Dir->getSingleClause<OMPNumThreadsClause>();
6878         CodeGenFunction::LexicalScope Scope(
6879             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6880         if (const auto *PreInit =
6881                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6882           for (const auto *I : PreInit->decls()) {
6883             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6884               CGF.EmitVarDecl(cast<VarDecl>(*I));
6885             } else {
6886               CodeGenFunction::AutoVarEmission Emission =
6887                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6888               CGF.EmitAutoVarCleanups(Emission);
6889             }
6890           }
6891         }
6892         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6893         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6894                                                /*isSigned=*/false);
6895         if (DefaultThreadLimitVal)
6896           NumThreads = CGF.Builder.CreateSelect(
6897               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6898               DefaultThreadLimitVal, NumThreads);
6899       } else {
6900         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6901                                            : CGF.Builder.getInt32(0);
6902       }
6903       // Process condition of the if clause.
6904       if (CondVal) {
6905         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6906                                               CGF.Builder.getInt32(1));
6907       }
6908       return NumThreads;
6909     }
6910     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6911       return CGF.Builder.getInt32(1);
6912     return DefaultThreadLimitVal;
6913   }
6914   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6915                                : CGF.Builder.getInt32(0);
6916 }
6917 
6918 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6919     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6920     int32_t &DefaultVal) {
6921   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6922   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6923          "Expected target-based executable directive.");
6924 
6925   switch (DirectiveKind) {
6926   case OMPD_target:
6927     // Teams have no clause thread_limit
6928     return nullptr;
6929   case OMPD_target_teams:
6930   case OMPD_target_teams_distribute:
6931     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6932       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6933       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6934       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6935         if (auto Constant =
6936                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6937           DefaultVal = Constant->getExtValue();
6938       return ThreadLimit;
6939     }
6940     return nullptr;
6941   case OMPD_target_parallel:
6942   case OMPD_target_parallel_for:
6943   case OMPD_target_parallel_for_simd:
6944   case OMPD_target_teams_distribute_parallel_for:
6945   case OMPD_target_teams_distribute_parallel_for_simd: {
6946     Expr *ThreadLimit = nullptr;
6947     Expr *NumThreads = nullptr;
6948     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6949       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6950       ThreadLimit = ThreadLimitClause->getThreadLimit();
6951       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6952         if (auto Constant =
6953                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6954           DefaultVal = Constant->getExtValue();
6955     }
6956     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6957       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6958       NumThreads = NumThreadsClause->getNumThreads();
6959       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6960         if (auto Constant =
6961                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6962           if (Constant->getExtValue() < DefaultVal) {
6963             DefaultVal = Constant->getExtValue();
6964             ThreadLimit = NumThreads;
6965           }
6966         }
6967       }
6968     }
6969     return ThreadLimit;
6970   }
6971   case OMPD_target_teams_distribute_simd:
6972   case OMPD_target_simd:
6973     DefaultVal = 1;
6974     return nullptr;
6975   case OMPD_parallel:
6976   case OMPD_for:
6977   case OMPD_parallel_for:
6978   case OMPD_parallel_master:
6979   case OMPD_parallel_sections:
6980   case OMPD_for_simd:
6981   case OMPD_parallel_for_simd:
6982   case OMPD_cancel:
6983   case OMPD_cancellation_point:
6984   case OMPD_ordered:
6985   case OMPD_threadprivate:
6986   case OMPD_allocate:
6987   case OMPD_task:
6988   case OMPD_simd:
6989   case OMPD_tile:
6990   case OMPD_unroll:
6991   case OMPD_sections:
6992   case OMPD_section:
6993   case OMPD_single:
6994   case OMPD_master:
6995   case OMPD_critical:
6996   case OMPD_taskyield:
6997   case OMPD_barrier:
6998   case OMPD_taskwait:
6999   case OMPD_taskgroup:
7000   case OMPD_atomic:
7001   case OMPD_flush:
7002   case OMPD_depobj:
7003   case OMPD_scan:
7004   case OMPD_teams:
7005   case OMPD_target_data:
7006   case OMPD_target_exit_data:
7007   case OMPD_target_enter_data:
7008   case OMPD_distribute:
7009   case OMPD_distribute_simd:
7010   case OMPD_distribute_parallel_for:
7011   case OMPD_distribute_parallel_for_simd:
7012   case OMPD_teams_distribute:
7013   case OMPD_teams_distribute_simd:
7014   case OMPD_teams_distribute_parallel_for:
7015   case OMPD_teams_distribute_parallel_for_simd:
7016   case OMPD_target_update:
7017   case OMPD_declare_simd:
7018   case OMPD_declare_variant:
7019   case OMPD_begin_declare_variant:
7020   case OMPD_end_declare_variant:
7021   case OMPD_declare_target:
7022   case OMPD_end_declare_target:
7023   case OMPD_declare_reduction:
7024   case OMPD_declare_mapper:
7025   case OMPD_taskloop:
7026   case OMPD_taskloop_simd:
7027   case OMPD_master_taskloop:
7028   case OMPD_master_taskloop_simd:
7029   case OMPD_parallel_master_taskloop:
7030   case OMPD_parallel_master_taskloop_simd:
7031   case OMPD_requires:
7032   case OMPD_unknown:
7033     break;
7034   default:
7035     break;
7036   }
7037   llvm_unreachable("Unsupported directive kind.");
7038 }
7039 
7040 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7041     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7042   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7043          "Clauses associated with the teams directive expected to be emitted "
7044          "only for the host!");
7045   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7046   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7047          "Expected target-based executable directive.");
7048   CGBuilderTy &Bld = CGF.Builder;
7049   llvm::Value *ThreadLimitVal = nullptr;
7050   llvm::Value *NumThreadsVal = nullptr;
7051   switch (DirectiveKind) {
7052   case OMPD_target: {
7053     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7054     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7055       return NumThreads;
7056     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7057         CGF.getContext(), CS->getCapturedStmt());
7058     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7059       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7060         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7061         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7062         const auto *ThreadLimitClause =
7063             Dir->getSingleClause<OMPThreadLimitClause>();
7064         CodeGenFunction::LexicalScope Scope(
7065             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7066         if (const auto *PreInit =
7067                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7068           for (const auto *I : PreInit->decls()) {
7069             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7070               CGF.EmitVarDecl(cast<VarDecl>(*I));
7071             } else {
7072               CodeGenFunction::AutoVarEmission Emission =
7073                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7074               CGF.EmitAutoVarCleanups(Emission);
7075             }
7076           }
7077         }
7078         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7079             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7080         ThreadLimitVal =
7081             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7082       }
7083       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7084           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7085         CS = Dir->getInnermostCapturedStmt();
7086         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7087             CGF.getContext(), CS->getCapturedStmt());
7088         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7089       }
7090       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7091           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7092         CS = Dir->getInnermostCapturedStmt();
7093         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7094           return NumThreads;
7095       }
7096       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7097         return Bld.getInt32(1);
7098     }
7099     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7100   }
7101   case OMPD_target_teams: {
7102     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7103       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7104       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7105       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7106           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7107       ThreadLimitVal =
7108           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7109     }
7110     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7111     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7112       return NumThreads;
7113     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7114         CGF.getContext(), CS->getCapturedStmt());
7115     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7116       if (Dir->getDirectiveKind() == OMPD_distribute) {
7117         CS = Dir->getInnermostCapturedStmt();
7118         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7119           return NumThreads;
7120       }
7121     }
7122     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7123   }
7124   case OMPD_target_teams_distribute:
7125     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7126       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7127       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7128       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7129           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7130       ThreadLimitVal =
7131           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7132     }
7133     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7134   case OMPD_target_parallel:
7135   case OMPD_target_parallel_for:
7136   case OMPD_target_parallel_for_simd:
7137   case OMPD_target_teams_distribute_parallel_for:
7138   case OMPD_target_teams_distribute_parallel_for_simd: {
7139     llvm::Value *CondVal = nullptr;
7140     // Handle if clause. If if clause present, the number of threads is
7141     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7142     if (D.hasClausesOfKind<OMPIfClause>()) {
7143       const OMPIfClause *IfClause = nullptr;
7144       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7145         if (C->getNameModifier() == OMPD_unknown ||
7146             C->getNameModifier() == OMPD_parallel) {
7147           IfClause = C;
7148           break;
7149         }
7150       }
7151       if (IfClause) {
7152         const Expr *Cond = IfClause->getCondition();
7153         bool Result;
7154         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7155           if (!Result)
7156             return Bld.getInt32(1);
7157         } else {
7158           CodeGenFunction::RunCleanupsScope Scope(CGF);
7159           CondVal = CGF.EvaluateExprAsBool(Cond);
7160         }
7161       }
7162     }
7163     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7164       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7165       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7166       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7167           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7168       ThreadLimitVal =
7169           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7170     }
7171     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7172       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7173       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7174       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7175           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7176       NumThreadsVal =
7177           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7178       ThreadLimitVal = ThreadLimitVal
7179                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7180                                                                 ThreadLimitVal),
7181                                               NumThreadsVal, ThreadLimitVal)
7182                            : NumThreadsVal;
7183     }
7184     if (!ThreadLimitVal)
7185       ThreadLimitVal = Bld.getInt32(0);
7186     if (CondVal)
7187       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7188     return ThreadLimitVal;
7189   }
7190   case OMPD_target_teams_distribute_simd:
7191   case OMPD_target_simd:
7192     return Bld.getInt32(1);
7193   case OMPD_parallel:
7194   case OMPD_for:
7195   case OMPD_parallel_for:
7196   case OMPD_parallel_master:
7197   case OMPD_parallel_sections:
7198   case OMPD_for_simd:
7199   case OMPD_parallel_for_simd:
7200   case OMPD_cancel:
7201   case OMPD_cancellation_point:
7202   case OMPD_ordered:
7203   case OMPD_threadprivate:
7204   case OMPD_allocate:
7205   case OMPD_task:
7206   case OMPD_simd:
7207   case OMPD_tile:
7208   case OMPD_unroll:
7209   case OMPD_sections:
7210   case OMPD_section:
7211   case OMPD_single:
7212   case OMPD_master:
7213   case OMPD_critical:
7214   case OMPD_taskyield:
7215   case OMPD_barrier:
7216   case OMPD_taskwait:
7217   case OMPD_taskgroup:
7218   case OMPD_atomic:
7219   case OMPD_flush:
7220   case OMPD_depobj:
7221   case OMPD_scan:
7222   case OMPD_teams:
7223   case OMPD_target_data:
7224   case OMPD_target_exit_data:
7225   case OMPD_target_enter_data:
7226   case OMPD_distribute:
7227   case OMPD_distribute_simd:
7228   case OMPD_distribute_parallel_for:
7229   case OMPD_distribute_parallel_for_simd:
7230   case OMPD_teams_distribute:
7231   case OMPD_teams_distribute_simd:
7232   case OMPD_teams_distribute_parallel_for:
7233   case OMPD_teams_distribute_parallel_for_simd:
7234   case OMPD_target_update:
7235   case OMPD_declare_simd:
7236   case OMPD_declare_variant:
7237   case OMPD_begin_declare_variant:
7238   case OMPD_end_declare_variant:
7239   case OMPD_declare_target:
7240   case OMPD_end_declare_target:
7241   case OMPD_declare_reduction:
7242   case OMPD_declare_mapper:
7243   case OMPD_taskloop:
7244   case OMPD_taskloop_simd:
7245   case OMPD_master_taskloop:
7246   case OMPD_master_taskloop_simd:
7247   case OMPD_parallel_master_taskloop:
7248   case OMPD_parallel_master_taskloop_simd:
7249   case OMPD_requires:
7250   case OMPD_metadirective:
7251   case OMPD_unknown:
7252     break;
7253   default:
7254     break;
7255   }
7256   llvm_unreachable("Unsupported directive kind.");
7257 }
7258 
7259 namespace {
7260 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7261 
7262 // Utility to handle information from clauses associated with a given
7263 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7264 // It provides a convenient interface to obtain the information and generate
7265 // code for that information.
7266 class MappableExprsHandler {
7267 public:
7268   /// Values for bit flags used to specify the mapping type for
7269   /// offloading.
7270   enum OpenMPOffloadMappingFlags : uint64_t {
7271     /// No flags
7272     OMP_MAP_NONE = 0x0,
7273     /// Allocate memory on the device and move data from host to device.
7274     OMP_MAP_TO = 0x01,
7275     /// Allocate memory on the device and move data from device to host.
7276     OMP_MAP_FROM = 0x02,
7277     /// Always perform the requested mapping action on the element, even
7278     /// if it was already mapped before.
7279     OMP_MAP_ALWAYS = 0x04,
7280     /// Delete the element from the device environment, ignoring the
7281     /// current reference count associated with the element.
7282     OMP_MAP_DELETE = 0x08,
7283     /// The element being mapped is a pointer-pointee pair; both the
7284     /// pointer and the pointee should be mapped.
7285     OMP_MAP_PTR_AND_OBJ = 0x10,
7286     /// This flags signals that the base address of an entry should be
7287     /// passed to the target kernel as an argument.
7288     OMP_MAP_TARGET_PARAM = 0x20,
7289     /// Signal that the runtime library has to return the device pointer
7290     /// in the current position for the data being mapped. Used when we have the
7291     /// use_device_ptr or use_device_addr clause.
7292     OMP_MAP_RETURN_PARAM = 0x40,
7293     /// This flag signals that the reference being passed is a pointer to
7294     /// private data.
7295     OMP_MAP_PRIVATE = 0x80,
7296     /// Pass the element to the device by value.
7297     OMP_MAP_LITERAL = 0x100,
7298     /// Implicit map
7299     OMP_MAP_IMPLICIT = 0x200,
7300     /// Close is a hint to the runtime to allocate memory close to
7301     /// the target device.
7302     OMP_MAP_CLOSE = 0x400,
7303     /// 0x800 is reserved for compatibility with XLC.
7304     /// Produce a runtime error if the data is not already allocated.
7305     OMP_MAP_PRESENT = 0x1000,
7306     // Increment and decrement a separate reference counter so that the data
7307     // cannot be unmapped within the associated region.  Thus, this flag is
7308     // intended to be used on 'target' and 'target data' directives because they
7309     // are inherently structured.  It is not intended to be used on 'target
7310     // enter data' and 'target exit data' directives because they are inherently
7311     // dynamic.
7312     // This is an OpenMP extension for the sake of OpenACC support.
7313     OMP_MAP_OMPX_HOLD = 0x2000,
7314     /// Signal that the runtime library should use args as an array of
7315     /// descriptor_dim pointers and use args_size as dims. Used when we have
7316     /// non-contiguous list items in target update directive
7317     OMP_MAP_NON_CONTIG = 0x100000000000,
7318     /// The 16 MSBs of the flags indicate whether the entry is member of some
7319     /// struct/class.
7320     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7321     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7322   };
7323 
7324   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7325   static unsigned getFlagMemberOffset() {
7326     unsigned Offset = 0;
7327     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7328          Remain = Remain >> 1)
7329       Offset++;
7330     return Offset;
7331   }
7332 
7333   /// Class that holds debugging information for a data mapping to be passed to
7334   /// the runtime library.
7335   class MappingExprInfo {
7336     /// The variable declaration used for the data mapping.
7337     const ValueDecl *MapDecl = nullptr;
7338     /// The original expression used in the map clause, or null if there is
7339     /// none.
7340     const Expr *MapExpr = nullptr;
7341 
7342   public:
7343     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7344         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7345 
7346     const ValueDecl *getMapDecl() const { return MapDecl; }
7347     const Expr *getMapExpr() const { return MapExpr; }
7348   };
7349 
7350   /// Class that associates information with a base pointer to be passed to the
7351   /// runtime library.
7352   class BasePointerInfo {
7353     /// The base pointer.
7354     llvm::Value *Ptr = nullptr;
7355     /// The base declaration that refers to this device pointer, or null if
7356     /// there is none.
7357     const ValueDecl *DevPtrDecl = nullptr;
7358 
7359   public:
7360     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7361         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7362     llvm::Value *operator*() const { return Ptr; }
7363     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7364     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7365   };
7366 
7367   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7368   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7369   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7370   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7371   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7372   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7373   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7374 
7375   /// This structure contains combined information generated for mappable
7376   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7377   /// mappers, and non-contiguous information.
7378   struct MapCombinedInfoTy {
7379     struct StructNonContiguousInfo {
7380       bool IsNonContiguous = false;
7381       MapDimArrayTy Dims;
7382       MapNonContiguousArrayTy Offsets;
7383       MapNonContiguousArrayTy Counts;
7384       MapNonContiguousArrayTy Strides;
7385     };
7386     MapExprsArrayTy Exprs;
7387     MapBaseValuesArrayTy BasePointers;
7388     MapValuesArrayTy Pointers;
7389     MapValuesArrayTy Sizes;
7390     MapFlagsArrayTy Types;
7391     MapMappersArrayTy Mappers;
7392     StructNonContiguousInfo NonContigInfo;
7393 
7394     /// Append arrays in \a CurInfo.
7395     void append(MapCombinedInfoTy &CurInfo) {
7396       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7397       BasePointers.append(CurInfo.BasePointers.begin(),
7398                           CurInfo.BasePointers.end());
7399       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7400       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7401       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7402       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7403       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7404                                  CurInfo.NonContigInfo.Dims.end());
7405       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7406                                     CurInfo.NonContigInfo.Offsets.end());
7407       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7408                                    CurInfo.NonContigInfo.Counts.end());
7409       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7410                                     CurInfo.NonContigInfo.Strides.end());
7411     }
7412   };
7413 
7414   /// Map between a struct and the its lowest & highest elements which have been
7415   /// mapped.
7416   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7417   ///                    HE(FieldIndex, Pointer)}
7418   struct StructRangeInfoTy {
7419     MapCombinedInfoTy PreliminaryMapData;
7420     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7421         0, Address::invalid()};
7422     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7423         0, Address::invalid()};
7424     Address Base = Address::invalid();
7425     Address LB = Address::invalid();
7426     bool IsArraySection = false;
7427     bool HasCompleteRecord = false;
7428   };
7429 
7430 private:
7431   /// Kind that defines how a device pointer has to be returned.
7432   struct MapInfo {
7433     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7434     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7435     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7436     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7437     bool ReturnDevicePointer = false;
7438     bool IsImplicit = false;
7439     const ValueDecl *Mapper = nullptr;
7440     const Expr *VarRef = nullptr;
7441     bool ForDeviceAddr = false;
7442 
7443     MapInfo() = default;
7444     MapInfo(
7445         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7446         OpenMPMapClauseKind MapType,
7447         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7448         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7449         bool ReturnDevicePointer, bool IsImplicit,
7450         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7451         bool ForDeviceAddr = false)
7452         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7453           MotionModifiers(MotionModifiers),
7454           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7455           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7456   };
7457 
7458   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7459   /// member and there is no map information about it, then emission of that
7460   /// entry is deferred until the whole struct has been processed.
7461   struct DeferredDevicePtrEntryTy {
7462     const Expr *IE = nullptr;
7463     const ValueDecl *VD = nullptr;
7464     bool ForDeviceAddr = false;
7465 
7466     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7467                              bool ForDeviceAddr)
7468         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7469   };
7470 
7471   /// The target directive from where the mappable clauses were extracted. It
7472   /// is either a executable directive or a user-defined mapper directive.
7473   llvm::PointerUnion<const OMPExecutableDirective *,
7474                      const OMPDeclareMapperDecl *>
7475       CurDir;
7476 
7477   /// Function the directive is being generated for.
7478   CodeGenFunction &CGF;
7479 
7480   /// Set of all first private variables in the current directive.
7481   /// bool data is set to true if the variable is implicitly marked as
7482   /// firstprivate, false otherwise.
7483   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7484 
7485   /// Map between device pointer declarations and their expression components.
7486   /// The key value for declarations in 'this' is null.
7487   llvm::DenseMap<
7488       const ValueDecl *,
7489       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7490       DevPointersMap;
7491 
7492   /// Map between lambda declarations and their map type.
7493   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7494 
7495   llvm::Value *getExprTypeSize(const Expr *E) const {
7496     QualType ExprTy = E->getType().getCanonicalType();
7497 
7498     // Calculate the size for array shaping expression.
7499     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7500       llvm::Value *Size =
7501           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7502       for (const Expr *SE : OAE->getDimensions()) {
7503         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7504         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7505                                       CGF.getContext().getSizeType(),
7506                                       SE->getExprLoc());
7507         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7508       }
7509       return Size;
7510     }
7511 
7512     // Reference types are ignored for mapping purposes.
7513     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7514       ExprTy = RefTy->getPointeeType().getCanonicalType();
7515 
7516     // Given that an array section is considered a built-in type, we need to
7517     // do the calculation based on the length of the section instead of relying
7518     // on CGF.getTypeSize(E->getType()).
7519     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7520       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7521                             OAE->getBase()->IgnoreParenImpCasts())
7522                             .getCanonicalType();
7523 
7524       // If there is no length associated with the expression and lower bound is
7525       // not specified too, that means we are using the whole length of the
7526       // base.
7527       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7528           !OAE->getLowerBound())
7529         return CGF.getTypeSize(BaseTy);
7530 
7531       llvm::Value *ElemSize;
7532       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7533         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7534       } else {
7535         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7536         assert(ATy && "Expecting array type if not a pointer type.");
7537         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7538       }
7539 
7540       // If we don't have a length at this point, that is because we have an
7541       // array section with a single element.
7542       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7543         return ElemSize;
7544 
7545       if (const Expr *LenExpr = OAE->getLength()) {
7546         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7547         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7548                                              CGF.getContext().getSizeType(),
7549                                              LenExpr->getExprLoc());
7550         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7551       }
7552       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7553              OAE->getLowerBound() && "expected array_section[lb:].");
7554       // Size = sizetype - lb * elemtype;
7555       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7556       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7557       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7558                                        CGF.getContext().getSizeType(),
7559                                        OAE->getLowerBound()->getExprLoc());
7560       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7561       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7562       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7563       LengthVal = CGF.Builder.CreateSelect(
7564           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7565       return LengthVal;
7566     }
7567     return CGF.getTypeSize(ExprTy);
7568   }
7569 
7570   /// Return the corresponding bits for a given map clause modifier. Add
7571   /// a flag marking the map as a pointer if requested. Add a flag marking the
7572   /// map as the first one of a series of maps that relate to the same map
7573   /// expression.
7574   OpenMPOffloadMappingFlags getMapTypeBits(
7575       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7576       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7577       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7578     OpenMPOffloadMappingFlags Bits =
7579         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7580     switch (MapType) {
7581     case OMPC_MAP_alloc:
7582     case OMPC_MAP_release:
7583       // alloc and release is the default behavior in the runtime library,  i.e.
7584       // if we don't pass any bits alloc/release that is what the runtime is
7585       // going to do. Therefore, we don't need to signal anything for these two
7586       // type modifiers.
7587       break;
7588     case OMPC_MAP_to:
7589       Bits |= OMP_MAP_TO;
7590       break;
7591     case OMPC_MAP_from:
7592       Bits |= OMP_MAP_FROM;
7593       break;
7594     case OMPC_MAP_tofrom:
7595       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7596       break;
7597     case OMPC_MAP_delete:
7598       Bits |= OMP_MAP_DELETE;
7599       break;
7600     case OMPC_MAP_unknown:
7601       llvm_unreachable("Unexpected map type!");
7602     }
7603     if (AddPtrFlag)
7604       Bits |= OMP_MAP_PTR_AND_OBJ;
7605     if (AddIsTargetParamFlag)
7606       Bits |= OMP_MAP_TARGET_PARAM;
7607     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7608       Bits |= OMP_MAP_ALWAYS;
7609     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7610       Bits |= OMP_MAP_CLOSE;
7611     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7612         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7613       Bits |= OMP_MAP_PRESENT;
7614     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7615       Bits |= OMP_MAP_OMPX_HOLD;
7616     if (IsNonContiguous)
7617       Bits |= OMP_MAP_NON_CONTIG;
7618     return Bits;
7619   }
7620 
7621   /// Return true if the provided expression is a final array section. A
7622   /// final array section, is one whose length can't be proved to be one.
7623   bool isFinalArraySectionExpression(const Expr *E) const {
7624     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7625 
7626     // It is not an array section and therefore not a unity-size one.
7627     if (!OASE)
7628       return false;
7629 
7630     // An array section with no colon always refer to a single element.
7631     if (OASE->getColonLocFirst().isInvalid())
7632       return false;
7633 
7634     const Expr *Length = OASE->getLength();
7635 
7636     // If we don't have a length we have to check if the array has size 1
7637     // for this dimension. Also, we should always expect a length if the
7638     // base type is pointer.
7639     if (!Length) {
7640       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7641                              OASE->getBase()->IgnoreParenImpCasts())
7642                              .getCanonicalType();
7643       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7644         return ATy->getSize().getSExtValue() != 1;
7645       // If we don't have a constant dimension length, we have to consider
7646       // the current section as having any size, so it is not necessarily
7647       // unitary. If it happen to be unity size, that's user fault.
7648       return true;
7649     }
7650 
7651     // Check if the length evaluates to 1.
7652     Expr::EvalResult Result;
7653     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7654       return true; // Can have more that size 1.
7655 
7656     llvm::APSInt ConstLength = Result.Val.getInt();
7657     return ConstLength.getSExtValue() != 1;
7658   }
7659 
7660   /// Generate the base pointers, section pointers, sizes, map type bits, and
7661   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7662   /// map type, map or motion modifiers, and expression components.
7663   /// \a IsFirstComponent should be set to true if the provided set of
7664   /// components is the first associated with a capture.
7665   void generateInfoForComponentList(
7666       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7667       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7668       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7669       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7670       bool IsFirstComponentList, bool IsImplicit,
7671       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7672       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7673       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7674           OverlappedElements = llvm::None) const {
7675     // The following summarizes what has to be generated for each map and the
7676     // types below. The generated information is expressed in this order:
7677     // base pointer, section pointer, size, flags
7678     // (to add to the ones that come from the map type and modifier).
7679     //
7680     // double d;
7681     // int i[100];
7682     // float *p;
7683     //
7684     // struct S1 {
7685     //   int i;
7686     //   float f[50];
7687     // }
7688     // struct S2 {
7689     //   int i;
7690     //   float f[50];
7691     //   S1 s;
7692     //   double *p;
7693     //   struct S2 *ps;
7694     //   int &ref;
7695     // }
7696     // S2 s;
7697     // S2 *ps;
7698     //
7699     // map(d)
7700     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7701     //
7702     // map(i)
7703     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7704     //
7705     // map(i[1:23])
7706     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7707     //
7708     // map(p)
7709     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7710     //
7711     // map(p[1:24])
7712     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7713     // in unified shared memory mode or for local pointers
7714     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7715     //
7716     // map(s)
7717     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7718     //
7719     // map(s.i)
7720     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7721     //
7722     // map(s.s.f)
7723     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7724     //
7725     // map(s.p)
7726     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7727     //
7728     // map(to: s.p[:22])
7729     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7730     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7731     // &(s.p), &(s.p[0]), 22*sizeof(double),
7732     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7733     // (*) alloc space for struct members, only this is a target parameter
7734     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7735     //      optimizes this entry out, same in the examples below)
7736     // (***) map the pointee (map: to)
7737     //
7738     // map(to: s.ref)
7739     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7740     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7741     // (*) alloc space for struct members, only this is a target parameter
7742     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7743     //      optimizes this entry out, same in the examples below)
7744     // (***) map the pointee (map: to)
7745     //
7746     // map(s.ps)
7747     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7748     //
7749     // map(from: s.ps->s.i)
7750     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7751     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7752     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7753     //
7754     // map(to: s.ps->ps)
7755     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7756     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7757     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7758     //
7759     // map(s.ps->ps->ps)
7760     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7761     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7762     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7763     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7764     //
7765     // map(to: s.ps->ps->s.f[:22])
7766     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7767     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7768     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7769     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7770     //
7771     // map(ps)
7772     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7773     //
7774     // map(ps->i)
7775     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7776     //
7777     // map(ps->s.f)
7778     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7779     //
7780     // map(from: ps->p)
7781     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7782     //
7783     // map(to: ps->p[:22])
7784     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7785     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7786     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7787     //
7788     // map(ps->ps)
7789     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7790     //
7791     // map(from: ps->ps->s.i)
7792     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7793     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7794     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7795     //
7796     // map(from: ps->ps->ps)
7797     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7798     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7799     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7800     //
7801     // map(ps->ps->ps->ps)
7802     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7803     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7804     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7805     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7806     //
7807     // map(to: ps->ps->ps->s.f[:22])
7808     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7809     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7810     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7811     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7812     //
7813     // map(to: s.f[:22]) map(from: s.p[:33])
7814     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7815     //     sizeof(double*) (**), TARGET_PARAM
7816     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7817     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7818     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7819     // (*) allocate contiguous space needed to fit all mapped members even if
7820     //     we allocate space for members not mapped (in this example,
7821     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7822     //     them as well because they fall between &s.f[0] and &s.p)
7823     //
7824     // map(from: s.f[:22]) map(to: ps->p[:33])
7825     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7826     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7827     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7828     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7829     // (*) the struct this entry pertains to is the 2nd element in the list of
7830     //     arguments, hence MEMBER_OF(2)
7831     //
7832     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7833     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7834     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7835     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7836     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7837     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7838     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7839     // (*) the struct this entry pertains to is the 4th element in the list
7840     //     of arguments, hence MEMBER_OF(4)
7841 
7842     // Track if the map information being generated is the first for a capture.
7843     bool IsCaptureFirstInfo = IsFirstComponentList;
7844     // When the variable is on a declare target link or in a to clause with
7845     // unified memory, a reference is needed to hold the host/device address
7846     // of the variable.
7847     bool RequiresReference = false;
7848 
7849     // Scan the components from the base to the complete expression.
7850     auto CI = Components.rbegin();
7851     auto CE = Components.rend();
7852     auto I = CI;
7853 
7854     // Track if the map information being generated is the first for a list of
7855     // components.
7856     bool IsExpressionFirstInfo = true;
7857     bool FirstPointerInComplexData = false;
7858     Address BP = Address::invalid();
7859     const Expr *AssocExpr = I->getAssociatedExpression();
7860     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7861     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7862     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7863 
7864     if (isa<MemberExpr>(AssocExpr)) {
7865       // The base is the 'this' pointer. The content of the pointer is going
7866       // to be the base of the field being mapped.
7867       BP = CGF.LoadCXXThisAddress();
7868     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7869                (OASE &&
7870                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7871       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7872     } else if (OAShE &&
7873                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7874       BP = Address(
7875           CGF.EmitScalarExpr(OAShE->getBase()),
7876           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7877     } else {
7878       // The base is the reference to the variable.
7879       // BP = &Var.
7880       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7881       if (const auto *VD =
7882               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7883         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7884                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7885           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7886               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7887                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7888             RequiresReference = true;
7889             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7890           }
7891         }
7892       }
7893 
7894       // If the variable is a pointer and is being dereferenced (i.e. is not
7895       // the last component), the base has to be the pointer itself, not its
7896       // reference. References are ignored for mapping purposes.
7897       QualType Ty =
7898           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7899       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7900         // No need to generate individual map information for the pointer, it
7901         // can be associated with the combined storage if shared memory mode is
7902         // active or the base declaration is not global variable.
7903         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7904         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7905             !VD || VD->hasLocalStorage())
7906           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7907         else
7908           FirstPointerInComplexData = true;
7909         ++I;
7910       }
7911     }
7912 
7913     // Track whether a component of the list should be marked as MEMBER_OF some
7914     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7915     // in a component list should be marked as MEMBER_OF, all subsequent entries
7916     // do not belong to the base struct. E.g.
7917     // struct S2 s;
7918     // s.ps->ps->ps->f[:]
7919     //   (1) (2) (3) (4)
7920     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7921     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7922     // is the pointee of ps(2) which is not member of struct s, so it should not
7923     // be marked as such (it is still PTR_AND_OBJ).
7924     // The variable is initialized to false so that PTR_AND_OBJ entries which
7925     // are not struct members are not considered (e.g. array of pointers to
7926     // data).
7927     bool ShouldBeMemberOf = false;
7928 
7929     // Variable keeping track of whether or not we have encountered a component
7930     // in the component list which is a member expression. Useful when we have a
7931     // pointer or a final array section, in which case it is the previous
7932     // component in the list which tells us whether we have a member expression.
7933     // E.g. X.f[:]
7934     // While processing the final array section "[:]" it is "f" which tells us
7935     // whether we are dealing with a member of a declared struct.
7936     const MemberExpr *EncounteredME = nullptr;
7937 
7938     // Track for the total number of dimension. Start from one for the dummy
7939     // dimension.
7940     uint64_t DimSize = 1;
7941 
7942     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7943     bool IsPrevMemberReference = false;
7944 
7945     for (; I != CE; ++I) {
7946       // If the current component is member of a struct (parent struct) mark it.
7947       if (!EncounteredME) {
7948         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7949         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7950         // as MEMBER_OF the parent struct.
7951         if (EncounteredME) {
7952           ShouldBeMemberOf = true;
7953           // Do not emit as complex pointer if this is actually not array-like
7954           // expression.
7955           if (FirstPointerInComplexData) {
7956             QualType Ty = std::prev(I)
7957                               ->getAssociatedDeclaration()
7958                               ->getType()
7959                               .getNonReferenceType();
7960             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7961             FirstPointerInComplexData = false;
7962           }
7963         }
7964       }
7965 
7966       auto Next = std::next(I);
7967 
7968       // We need to generate the addresses and sizes if this is the last
7969       // component, if the component is a pointer or if it is an array section
7970       // whose length can't be proved to be one. If this is a pointer, it
7971       // becomes the base address for the following components.
7972 
7973       // A final array section, is one whose length can't be proved to be one.
7974       // If the map item is non-contiguous then we don't treat any array section
7975       // as final array section.
7976       bool IsFinalArraySection =
7977           !IsNonContiguous &&
7978           isFinalArraySectionExpression(I->getAssociatedExpression());
7979 
7980       // If we have a declaration for the mapping use that, otherwise use
7981       // the base declaration of the map clause.
7982       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7983                                      ? I->getAssociatedDeclaration()
7984                                      : BaseDecl;
7985       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7986                                                : MapExpr;
7987 
7988       // Get information on whether the element is a pointer. Have to do a
7989       // special treatment for array sections given that they are built-in
7990       // types.
7991       const auto *OASE =
7992           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7993       const auto *OAShE =
7994           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7995       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7996       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7997       bool IsPointer =
7998           OAShE ||
7999           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8000                        .getCanonicalType()
8001                        ->isAnyPointerType()) ||
8002           I->getAssociatedExpression()->getType()->isAnyPointerType();
8003       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8004                                MapDecl &&
8005                                MapDecl->getType()->isLValueReferenceType();
8006       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8007 
8008       if (OASE)
8009         ++DimSize;
8010 
8011       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8012           IsFinalArraySection) {
8013         // If this is not the last component, we expect the pointer to be
8014         // associated with an array expression or member expression.
8015         assert((Next == CE ||
8016                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8017                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8018                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8019                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8020                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8021                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8022                "Unexpected expression");
8023 
8024         Address LB = Address::invalid();
8025         Address LowestElem = Address::invalid();
8026         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8027                                        const MemberExpr *E) {
8028           const Expr *BaseExpr = E->getBase();
8029           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8030           // scalar.
8031           LValue BaseLV;
8032           if (E->isArrow()) {
8033             LValueBaseInfo BaseInfo;
8034             TBAAAccessInfo TBAAInfo;
8035             Address Addr =
8036                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8037             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8038             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8039           } else {
8040             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8041           }
8042           return BaseLV;
8043         };
8044         if (OAShE) {
8045           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8046                                     CGF.getContext().getTypeAlignInChars(
8047                                         OAShE->getBase()->getType()));
8048         } else if (IsMemberReference) {
8049           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8050           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8051           LowestElem = CGF.EmitLValueForFieldInitialization(
8052                               BaseLVal, cast<FieldDecl>(MapDecl))
8053                            .getAddress(CGF);
8054           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8055                    .getAddress(CGF);
8056         } else {
8057           LowestElem = LB =
8058               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8059                   .getAddress(CGF);
8060         }
8061 
8062         // If this component is a pointer inside the base struct then we don't
8063         // need to create any entry for it - it will be combined with the object
8064         // it is pointing to into a single PTR_AND_OBJ entry.
8065         bool IsMemberPointerOrAddr =
8066             EncounteredME &&
8067             (((IsPointer || ForDeviceAddr) &&
8068               I->getAssociatedExpression() == EncounteredME) ||
8069              (IsPrevMemberReference && !IsPointer) ||
8070              (IsMemberReference && Next != CE &&
8071               !Next->getAssociatedExpression()->getType()->isPointerType()));
8072         if (!OverlappedElements.empty() && Next == CE) {
8073           // Handle base element with the info for overlapped elements.
8074           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8075           assert(!IsPointer &&
8076                  "Unexpected base element with the pointer type.");
8077           // Mark the whole struct as the struct that requires allocation on the
8078           // device.
8079           PartialStruct.LowestElem = {0, LowestElem};
8080           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8081               I->getAssociatedExpression()->getType());
8082           Address HB = CGF.Builder.CreateConstGEP(
8083               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8084                                                               CGF.VoidPtrTy),
8085               TypeSize.getQuantity() - 1);
8086           PartialStruct.HighestElem = {
8087               std::numeric_limits<decltype(
8088                   PartialStruct.HighestElem.first)>::max(),
8089               HB};
8090           PartialStruct.Base = BP;
8091           PartialStruct.LB = LB;
8092           assert(
8093               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8094               "Overlapped elements must be used only once for the variable.");
8095           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8096           // Emit data for non-overlapped data.
8097           OpenMPOffloadMappingFlags Flags =
8098               OMP_MAP_MEMBER_OF |
8099               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8100                              /*AddPtrFlag=*/false,
8101                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8102           llvm::Value *Size = nullptr;
8103           // Do bitcopy of all non-overlapped structure elements.
8104           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8105                    Component : OverlappedElements) {
8106             Address ComponentLB = Address::invalid();
8107             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8108                  Component) {
8109               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8110                 const auto *FD = dyn_cast<FieldDecl>(VD);
8111                 if (FD && FD->getType()->isLValueReferenceType()) {
8112                   const auto *ME =
8113                       cast<MemberExpr>(MC.getAssociatedExpression());
8114                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8115                   ComponentLB =
8116                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8117                           .getAddress(CGF);
8118                 } else {
8119                   ComponentLB =
8120                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8121                           .getAddress(CGF);
8122                 }
8123                 Size = CGF.Builder.CreatePtrDiff(
8124                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8125                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8126                 break;
8127               }
8128             }
8129             assert(Size && "Failed to determine structure size");
8130             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8131             CombinedInfo.BasePointers.push_back(BP.getPointer());
8132             CombinedInfo.Pointers.push_back(LB.getPointer());
8133             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8134                 Size, CGF.Int64Ty, /*isSigned=*/true));
8135             CombinedInfo.Types.push_back(Flags);
8136             CombinedInfo.Mappers.push_back(nullptr);
8137             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8138                                                                       : 1);
8139             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8140           }
8141           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8142           CombinedInfo.BasePointers.push_back(BP.getPointer());
8143           CombinedInfo.Pointers.push_back(LB.getPointer());
8144           Size = CGF.Builder.CreatePtrDiff(
8145               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8146               CGF.EmitCastToVoidPtr(LB.getPointer()));
8147           CombinedInfo.Sizes.push_back(
8148               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8149           CombinedInfo.Types.push_back(Flags);
8150           CombinedInfo.Mappers.push_back(nullptr);
8151           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8152                                                                     : 1);
8153           break;
8154         }
8155         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8156         if (!IsMemberPointerOrAddr ||
8157             (Next == CE && MapType != OMPC_MAP_unknown)) {
8158           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8159           CombinedInfo.BasePointers.push_back(BP.getPointer());
8160           CombinedInfo.Pointers.push_back(LB.getPointer());
8161           CombinedInfo.Sizes.push_back(
8162               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8163           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8164                                                                     : 1);
8165 
8166           // If Mapper is valid, the last component inherits the mapper.
8167           bool HasMapper = Mapper && Next == CE;
8168           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8169 
8170           // We need to add a pointer flag for each map that comes from the
8171           // same expression except for the first one. We also need to signal
8172           // this map is the first one that relates with the current capture
8173           // (there is a set of entries for each capture).
8174           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8175               MapType, MapModifiers, MotionModifiers, IsImplicit,
8176               !IsExpressionFirstInfo || RequiresReference ||
8177                   FirstPointerInComplexData || IsMemberReference,
8178               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8179 
8180           if (!IsExpressionFirstInfo || IsMemberReference) {
8181             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8182             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8183             if (IsPointer || (IsMemberReference && Next != CE))
8184               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8185                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8186 
8187             if (ShouldBeMemberOf) {
8188               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8189               // should be later updated with the correct value of MEMBER_OF.
8190               Flags |= OMP_MAP_MEMBER_OF;
8191               // From now on, all subsequent PTR_AND_OBJ entries should not be
8192               // marked as MEMBER_OF.
8193               ShouldBeMemberOf = false;
8194             }
8195           }
8196 
8197           CombinedInfo.Types.push_back(Flags);
8198         }
8199 
8200         // If we have encountered a member expression so far, keep track of the
8201         // mapped member. If the parent is "*this", then the value declaration
8202         // is nullptr.
8203         if (EncounteredME) {
8204           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8205           unsigned FieldIndex = FD->getFieldIndex();
8206 
8207           // Update info about the lowest and highest elements for this struct
8208           if (!PartialStruct.Base.isValid()) {
8209             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8210             if (IsFinalArraySection) {
8211               Address HB =
8212                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8213                       .getAddress(CGF);
8214               PartialStruct.HighestElem = {FieldIndex, HB};
8215             } else {
8216               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8217             }
8218             PartialStruct.Base = BP;
8219             PartialStruct.LB = BP;
8220           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8221             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8222           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8223             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8224           }
8225         }
8226 
8227         // Need to emit combined struct for array sections.
8228         if (IsFinalArraySection || IsNonContiguous)
8229           PartialStruct.IsArraySection = true;
8230 
8231         // If we have a final array section, we are done with this expression.
8232         if (IsFinalArraySection)
8233           break;
8234 
8235         // The pointer becomes the base for the next element.
8236         if (Next != CE)
8237           BP = IsMemberReference ? LowestElem : LB;
8238 
8239         IsExpressionFirstInfo = false;
8240         IsCaptureFirstInfo = false;
8241         FirstPointerInComplexData = false;
8242         IsPrevMemberReference = IsMemberReference;
8243       } else if (FirstPointerInComplexData) {
8244         QualType Ty = Components.rbegin()
8245                           ->getAssociatedDeclaration()
8246                           ->getType()
8247                           .getNonReferenceType();
8248         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8249         FirstPointerInComplexData = false;
8250       }
8251     }
8252     // If ran into the whole component - allocate the space for the whole
8253     // record.
8254     if (!EncounteredME)
8255       PartialStruct.HasCompleteRecord = true;
8256 
8257     if (!IsNonContiguous)
8258       return;
8259 
8260     const ASTContext &Context = CGF.getContext();
8261 
8262     // For supporting stride in array section, we need to initialize the first
8263     // dimension size as 1, first offset as 0, and first count as 1
8264     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8265     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8266     MapValuesArrayTy CurStrides;
8267     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8268     uint64_t ElementTypeSize;
8269 
8270     // Collect Size information for each dimension and get the element size as
8271     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8272     // should be [10, 10] and the first stride is 4 btyes.
8273     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8274          Components) {
8275       const Expr *AssocExpr = Component.getAssociatedExpression();
8276       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8277 
8278       if (!OASE)
8279         continue;
8280 
8281       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8282       auto *CAT = Context.getAsConstantArrayType(Ty);
8283       auto *VAT = Context.getAsVariableArrayType(Ty);
8284 
8285       // We need all the dimension size except for the last dimension.
8286       assert((VAT || CAT || &Component == &*Components.begin()) &&
8287              "Should be either ConstantArray or VariableArray if not the "
8288              "first Component");
8289 
8290       // Get element size if CurStrides is empty.
8291       if (CurStrides.empty()) {
8292         const Type *ElementType = nullptr;
8293         if (CAT)
8294           ElementType = CAT->getElementType().getTypePtr();
8295         else if (VAT)
8296           ElementType = VAT->getElementType().getTypePtr();
8297         else
8298           assert(&Component == &*Components.begin() &&
8299                  "Only expect pointer (non CAT or VAT) when this is the "
8300                  "first Component");
8301         // If ElementType is null, then it means the base is a pointer
8302         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8303         // for next iteration.
8304         if (ElementType) {
8305           // For the case that having pointer as base, we need to remove one
8306           // level of indirection.
8307           if (&Component != &*Components.begin())
8308             ElementType = ElementType->getPointeeOrArrayElementType();
8309           ElementTypeSize =
8310               Context.getTypeSizeInChars(ElementType).getQuantity();
8311           CurStrides.push_back(
8312               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8313         }
8314       }
8315       // Get dimension value except for the last dimension since we don't need
8316       // it.
8317       if (DimSizes.size() < Components.size() - 1) {
8318         if (CAT)
8319           DimSizes.push_back(llvm::ConstantInt::get(
8320               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8321         else if (VAT)
8322           DimSizes.push_back(CGF.Builder.CreateIntCast(
8323               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8324               /*IsSigned=*/false));
8325       }
8326     }
8327 
8328     // Skip the dummy dimension since we have already have its information.
8329     auto DI = DimSizes.begin() + 1;
8330     // Product of dimension.
8331     llvm::Value *DimProd =
8332         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8333 
8334     // Collect info for non-contiguous. Notice that offset, count, and stride
8335     // are only meaningful for array-section, so we insert a null for anything
8336     // other than array-section.
8337     // Also, the size of offset, count, and stride are not the same as
8338     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8339     // count, and stride are the same as the number of non-contiguous
8340     // declaration in target update to/from clause.
8341     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8342          Components) {
8343       const Expr *AssocExpr = Component.getAssociatedExpression();
8344 
8345       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8346         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8347             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8348             /*isSigned=*/false);
8349         CurOffsets.push_back(Offset);
8350         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8351         CurStrides.push_back(CurStrides.back());
8352         continue;
8353       }
8354 
8355       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8356 
8357       if (!OASE)
8358         continue;
8359 
8360       // Offset
8361       const Expr *OffsetExpr = OASE->getLowerBound();
8362       llvm::Value *Offset = nullptr;
8363       if (!OffsetExpr) {
8364         // If offset is absent, then we just set it to zero.
8365         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8366       } else {
8367         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8368                                            CGF.Int64Ty,
8369                                            /*isSigned=*/false);
8370       }
8371       CurOffsets.push_back(Offset);
8372 
8373       // Count
8374       const Expr *CountExpr = OASE->getLength();
8375       llvm::Value *Count = nullptr;
8376       if (!CountExpr) {
8377         // In Clang, once a high dimension is an array section, we construct all
8378         // the lower dimension as array section, however, for case like
8379         // arr[0:2][2], Clang construct the inner dimension as an array section
8380         // but it actually is not in an array section form according to spec.
8381         if (!OASE->getColonLocFirst().isValid() &&
8382             !OASE->getColonLocSecond().isValid()) {
8383           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8384         } else {
8385           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8386           // When the length is absent it defaults to ⌈(size −
8387           // lower-bound)/stride⌉, where size is the size of the array
8388           // dimension.
8389           const Expr *StrideExpr = OASE->getStride();
8390           llvm::Value *Stride =
8391               StrideExpr
8392                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8393                                               CGF.Int64Ty, /*isSigned=*/false)
8394                   : nullptr;
8395           if (Stride)
8396             Count = CGF.Builder.CreateUDiv(
8397                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8398           else
8399             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8400         }
8401       } else {
8402         Count = CGF.EmitScalarExpr(CountExpr);
8403       }
8404       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8405       CurCounts.push_back(Count);
8406 
8407       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8408       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8409       //              Offset      Count     Stride
8410       //    D0          0           1         4    (int)    <- dummy dimension
8411       //    D1          0           2         8    (2 * (1) * 4)
8412       //    D2          1           2         20   (1 * (1 * 5) * 4)
8413       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8414       const Expr *StrideExpr = OASE->getStride();
8415       llvm::Value *Stride =
8416           StrideExpr
8417               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8418                                           CGF.Int64Ty, /*isSigned=*/false)
8419               : nullptr;
8420       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8421       if (Stride)
8422         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8423       else
8424         CurStrides.push_back(DimProd);
8425       if (DI != DimSizes.end())
8426         ++DI;
8427     }
8428 
8429     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8430     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8431     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8432   }
8433 
8434   /// Return the adjusted map modifiers if the declaration a capture refers to
8435   /// appears in a first-private clause. This is expected to be used only with
8436   /// directives that start with 'target'.
8437   MappableExprsHandler::OpenMPOffloadMappingFlags
8438   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8439     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8440 
8441     // A first private variable captured by reference will use only the
8442     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8443     // declaration is known as first-private in this handler.
8444     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8445       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8446         return MappableExprsHandler::OMP_MAP_TO |
8447                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8448       return MappableExprsHandler::OMP_MAP_PRIVATE |
8449              MappableExprsHandler::OMP_MAP_TO;
8450     }
8451     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8452     if (I != LambdasMap.end())
8453       // for map(to: lambda): using user specified map type.
8454       return getMapTypeBits(
8455           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8456           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8457           /*AddPtrFlag=*/false,
8458           /*AddIsTargetParamFlag=*/false,
8459           /*isNonContiguous=*/false);
8460     return MappableExprsHandler::OMP_MAP_TO |
8461            MappableExprsHandler::OMP_MAP_FROM;
8462   }
8463 
8464   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8465     // Rotate by getFlagMemberOffset() bits.
8466     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8467                                                   << getFlagMemberOffset());
8468   }
8469 
8470   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8471                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8472     // If the entry is PTR_AND_OBJ but has not been marked with the special
8473     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8474     // marked as MEMBER_OF.
8475     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8476         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8477       return;
8478 
8479     // Reset the placeholder value to prepare the flag for the assignment of the
8480     // proper MEMBER_OF value.
8481     Flags &= ~OMP_MAP_MEMBER_OF;
8482     Flags |= MemberOfFlag;
8483   }
8484 
8485   void getPlainLayout(const CXXRecordDecl *RD,
8486                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8487                       bool AsBase) const {
8488     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8489 
8490     llvm::StructType *St =
8491         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8492 
8493     unsigned NumElements = St->getNumElements();
8494     llvm::SmallVector<
8495         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8496         RecordLayout(NumElements);
8497 
8498     // Fill bases.
8499     for (const auto &I : RD->bases()) {
8500       if (I.isVirtual())
8501         continue;
8502       const auto *Base = I.getType()->getAsCXXRecordDecl();
8503       // Ignore empty bases.
8504       if (Base->isEmpty() || CGF.getContext()
8505                                  .getASTRecordLayout(Base)
8506                                  .getNonVirtualSize()
8507                                  .isZero())
8508         continue;
8509 
8510       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8511       RecordLayout[FieldIndex] = Base;
8512     }
8513     // Fill in virtual bases.
8514     for (const auto &I : RD->vbases()) {
8515       const auto *Base = I.getType()->getAsCXXRecordDecl();
8516       // Ignore empty bases.
8517       if (Base->isEmpty())
8518         continue;
8519       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8520       if (RecordLayout[FieldIndex])
8521         continue;
8522       RecordLayout[FieldIndex] = Base;
8523     }
8524     // Fill in all the fields.
8525     assert(!RD->isUnion() && "Unexpected union.");
8526     for (const auto *Field : RD->fields()) {
8527       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8528       // will fill in later.)
8529       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8530         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8531         RecordLayout[FieldIndex] = Field;
8532       }
8533     }
8534     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8535              &Data : RecordLayout) {
8536       if (Data.isNull())
8537         continue;
8538       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8539         getPlainLayout(Base, Layout, /*AsBase=*/true);
8540       else
8541         Layout.push_back(Data.get<const FieldDecl *>());
8542     }
8543   }
8544 
8545   /// Generate all the base pointers, section pointers, sizes, map types, and
8546   /// mappers for the extracted mappable expressions (all included in \a
8547   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8548   /// pair of the relevant declaration and index where it occurs is appended to
8549   /// the device pointers info array.
8550   void generateAllInfoForClauses(
8551       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8552       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8553           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8554     // We have to process the component lists that relate with the same
8555     // declaration in a single chunk so that we can generate the map flags
8556     // correctly. Therefore, we organize all lists in a map.
8557     enum MapKind { Present, Allocs, Other, Total };
8558     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8559                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8560         Info;
8561 
8562     // Helper function to fill the information map for the different supported
8563     // clauses.
8564     auto &&InfoGen =
8565         [&Info, &SkipVarSet](
8566             const ValueDecl *D, MapKind Kind,
8567             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8568             OpenMPMapClauseKind MapType,
8569             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8570             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8571             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8572             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8573           if (SkipVarSet.contains(D))
8574             return;
8575           auto It = Info.find(D);
8576           if (It == Info.end())
8577             It = Info
8578                      .insert(std::make_pair(
8579                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8580                      .first;
8581           It->second[Kind].emplace_back(
8582               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8583               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8584         };
8585 
8586     for (const auto *Cl : Clauses) {
8587       const auto *C = dyn_cast<OMPMapClause>(Cl);
8588       if (!C)
8589         continue;
8590       MapKind Kind = Other;
8591       if (llvm::is_contained(C->getMapTypeModifiers(),
8592                              OMPC_MAP_MODIFIER_present))
8593         Kind = Present;
8594       else if (C->getMapType() == OMPC_MAP_alloc)
8595         Kind = Allocs;
8596       const auto *EI = C->getVarRefs().begin();
8597       for (const auto L : C->component_lists()) {
8598         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8599         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8600                 C->getMapTypeModifiers(), llvm::None,
8601                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8602                 E);
8603         ++EI;
8604       }
8605     }
8606     for (const auto *Cl : Clauses) {
8607       const auto *C = dyn_cast<OMPToClause>(Cl);
8608       if (!C)
8609         continue;
8610       MapKind Kind = Other;
8611       if (llvm::is_contained(C->getMotionModifiers(),
8612                              OMPC_MOTION_MODIFIER_present))
8613         Kind = Present;
8614       const auto *EI = C->getVarRefs().begin();
8615       for (const auto L : C->component_lists()) {
8616         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8617                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8618                 C->isImplicit(), std::get<2>(L), *EI);
8619         ++EI;
8620       }
8621     }
8622     for (const auto *Cl : Clauses) {
8623       const auto *C = dyn_cast<OMPFromClause>(Cl);
8624       if (!C)
8625         continue;
8626       MapKind Kind = Other;
8627       if (llvm::is_contained(C->getMotionModifiers(),
8628                              OMPC_MOTION_MODIFIER_present))
8629         Kind = Present;
8630       const auto *EI = C->getVarRefs().begin();
8631       for (const auto L : C->component_lists()) {
8632         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8633                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8634                 C->isImplicit(), std::get<2>(L), *EI);
8635         ++EI;
8636       }
8637     }
8638 
8639     // Look at the use_device_ptr clause information and mark the existing map
8640     // entries as such. If there is no map information for an entry in the
8641     // use_device_ptr list, we create one with map type 'alloc' and zero size
8642     // section. It is the user fault if that was not mapped before. If there is
8643     // no map information and the pointer is a struct member, then we defer the
8644     // emission of that entry until the whole struct has been processed.
8645     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8646                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8647         DeferredInfo;
8648     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8649 
8650     for (const auto *Cl : Clauses) {
8651       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8652       if (!C)
8653         continue;
8654       for (const auto L : C->component_lists()) {
8655         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8656             std::get<1>(L);
8657         assert(!Components.empty() &&
8658                "Not expecting empty list of components!");
8659         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8660         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8661         const Expr *IE = Components.back().getAssociatedExpression();
8662         // If the first component is a member expression, we have to look into
8663         // 'this', which maps to null in the map of map information. Otherwise
8664         // look directly for the information.
8665         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8666 
8667         // We potentially have map information for this declaration already.
8668         // Look for the first set of components that refer to it.
8669         if (It != Info.end()) {
8670           bool Found = false;
8671           for (auto &Data : It->second) {
8672             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8673               return MI.Components.back().getAssociatedDeclaration() == VD;
8674             });
8675             // If we found a map entry, signal that the pointer has to be
8676             // returned and move on to the next declaration. Exclude cases where
8677             // the base pointer is mapped as array subscript, array section or
8678             // array shaping. The base address is passed as a pointer to base in
8679             // this case and cannot be used as a base for use_device_ptr list
8680             // item.
8681             if (CI != Data.end()) {
8682               auto PrevCI = std::next(CI->Components.rbegin());
8683               const auto *VarD = dyn_cast<VarDecl>(VD);
8684               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8685                   isa<MemberExpr>(IE) ||
8686                   !VD->getType().getNonReferenceType()->isPointerType() ||
8687                   PrevCI == CI->Components.rend() ||
8688                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8689                   VarD->hasLocalStorage()) {
8690                 CI->ReturnDevicePointer = true;
8691                 Found = true;
8692                 break;
8693               }
8694             }
8695           }
8696           if (Found)
8697             continue;
8698         }
8699 
8700         // We didn't find any match in our map information - generate a zero
8701         // size array section - if the pointer is a struct member we defer this
8702         // action until the whole struct has been processed.
8703         if (isa<MemberExpr>(IE)) {
8704           // Insert the pointer into Info to be processed by
8705           // generateInfoForComponentList. Because it is a member pointer
8706           // without a pointee, no entry will be generated for it, therefore
8707           // we need to generate one after the whole struct has been processed.
8708           // Nonetheless, generateInfoForComponentList must be called to take
8709           // the pointer into account for the calculation of the range of the
8710           // partial struct.
8711           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8712                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8713                   nullptr);
8714           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8715         } else {
8716           llvm::Value *Ptr =
8717               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8718           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8719           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8720           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8721           UseDevicePtrCombinedInfo.Sizes.push_back(
8722               llvm::Constant::getNullValue(CGF.Int64Ty));
8723           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8724           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8725         }
8726       }
8727     }
8728 
8729     // Look at the use_device_addr clause information and mark the existing map
8730     // entries as such. If there is no map information for an entry in the
8731     // use_device_addr list, we create one with map type 'alloc' and zero size
8732     // section. It is the user fault if that was not mapped before. If there is
8733     // no map information and the pointer is a struct member, then we defer the
8734     // emission of that entry until the whole struct has been processed.
8735     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8736     for (const auto *Cl : Clauses) {
8737       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8738       if (!C)
8739         continue;
8740       for (const auto L : C->component_lists()) {
8741         assert(!std::get<1>(L).empty() &&
8742                "Not expecting empty list of components!");
8743         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8744         if (!Processed.insert(VD).second)
8745           continue;
8746         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8747         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8748         // If the first component is a member expression, we have to look into
8749         // 'this', which maps to null in the map of map information. Otherwise
8750         // look directly for the information.
8751         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8752 
8753         // We potentially have map information for this declaration already.
8754         // Look for the first set of components that refer to it.
8755         if (It != Info.end()) {
8756           bool Found = false;
8757           for (auto &Data : It->second) {
8758             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8759               return MI.Components.back().getAssociatedDeclaration() == VD;
8760             });
8761             // If we found a map entry, signal that the pointer has to be
8762             // returned and move on to the next declaration.
8763             if (CI != Data.end()) {
8764               CI->ReturnDevicePointer = true;
8765               Found = true;
8766               break;
8767             }
8768           }
8769           if (Found)
8770             continue;
8771         }
8772 
8773         // We didn't find any match in our map information - generate a zero
8774         // size array section - if the pointer is a struct member we defer this
8775         // action until the whole struct has been processed.
8776         if (isa<MemberExpr>(IE)) {
8777           // Insert the pointer into Info to be processed by
8778           // generateInfoForComponentList. Because it is a member pointer
8779           // without a pointee, no entry will be generated for it, therefore
8780           // we need to generate one after the whole struct has been processed.
8781           // Nonetheless, generateInfoForComponentList must be called to take
8782           // the pointer into account for the calculation of the range of the
8783           // partial struct.
8784           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8785                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8786                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8787           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8788         } else {
8789           llvm::Value *Ptr;
8790           if (IE->isGLValue())
8791             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8792           else
8793             Ptr = CGF.EmitScalarExpr(IE);
8794           CombinedInfo.Exprs.push_back(VD);
8795           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8796           CombinedInfo.Pointers.push_back(Ptr);
8797           CombinedInfo.Sizes.push_back(
8798               llvm::Constant::getNullValue(CGF.Int64Ty));
8799           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8800           CombinedInfo.Mappers.push_back(nullptr);
8801         }
8802       }
8803     }
8804 
8805     for (const auto &Data : Info) {
8806       StructRangeInfoTy PartialStruct;
8807       // Temporary generated information.
8808       MapCombinedInfoTy CurInfo;
8809       const Decl *D = Data.first;
8810       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8811       for (const auto &M : Data.second) {
8812         for (const MapInfo &L : M) {
8813           assert(!L.Components.empty() &&
8814                  "Not expecting declaration with no component lists.");
8815 
8816           // Remember the current base pointer index.
8817           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8818           CurInfo.NonContigInfo.IsNonContiguous =
8819               L.Components.back().isNonContiguous();
8820           generateInfoForComponentList(
8821               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8822               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8823               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8824 
8825           // If this entry relates with a device pointer, set the relevant
8826           // declaration and add the 'return pointer' flag.
8827           if (L.ReturnDevicePointer) {
8828             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8829                    "Unexpected number of mapped base pointers.");
8830 
8831             const ValueDecl *RelevantVD =
8832                 L.Components.back().getAssociatedDeclaration();
8833             assert(RelevantVD &&
8834                    "No relevant declaration related with device pointer??");
8835 
8836             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8837                 RelevantVD);
8838             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8839           }
8840         }
8841       }
8842 
8843       // Append any pending zero-length pointers which are struct members and
8844       // used with use_device_ptr or use_device_addr.
8845       auto CI = DeferredInfo.find(Data.first);
8846       if (CI != DeferredInfo.end()) {
8847         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8848           llvm::Value *BasePtr;
8849           llvm::Value *Ptr;
8850           if (L.ForDeviceAddr) {
8851             if (L.IE->isGLValue())
8852               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8853             else
8854               Ptr = this->CGF.EmitScalarExpr(L.IE);
8855             BasePtr = Ptr;
8856             // Entry is RETURN_PARAM. Also, set the placeholder value
8857             // MEMBER_OF=FFFF so that the entry is later updated with the
8858             // correct value of MEMBER_OF.
8859             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8860           } else {
8861             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8862             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8863                                              L.IE->getExprLoc());
8864             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8865             // placeholder value MEMBER_OF=FFFF so that the entry is later
8866             // updated with the correct value of MEMBER_OF.
8867             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8868                                     OMP_MAP_MEMBER_OF);
8869           }
8870           CurInfo.Exprs.push_back(L.VD);
8871           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8872           CurInfo.Pointers.push_back(Ptr);
8873           CurInfo.Sizes.push_back(
8874               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8875           CurInfo.Mappers.push_back(nullptr);
8876         }
8877       }
8878       // If there is an entry in PartialStruct it means we have a struct with
8879       // individual members mapped. Emit an extra combined entry.
8880       if (PartialStruct.Base.isValid()) {
8881         CurInfo.NonContigInfo.Dims.push_back(0);
8882         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8883       }
8884 
8885       // We need to append the results of this capture to what we already
8886       // have.
8887       CombinedInfo.append(CurInfo);
8888     }
8889     // Append data for use_device_ptr clauses.
8890     CombinedInfo.append(UseDevicePtrCombinedInfo);
8891   }
8892 
8893 public:
8894   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8895       : CurDir(&Dir), CGF(CGF) {
8896     // Extract firstprivate clause information.
8897     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8898       for (const auto *D : C->varlists())
8899         FirstPrivateDecls.try_emplace(
8900             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8901     // Extract implicit firstprivates from uses_allocators clauses.
8902     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8903       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8904         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8905         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8906           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8907                                         /*Implicit=*/true);
8908         else if (const auto *VD = dyn_cast<VarDecl>(
8909                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8910                          ->getDecl()))
8911           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8912       }
8913     }
8914     // Extract device pointer clause information.
8915     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8916       for (auto L : C->component_lists())
8917         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8918     // Extract map information.
8919     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8920       if (C->getMapType() != OMPC_MAP_to)
8921         continue;
8922       for (auto L : C->component_lists()) {
8923         const ValueDecl *VD = std::get<0>(L);
8924         const auto *RD = VD ? VD->getType()
8925                                   .getCanonicalType()
8926                                   .getNonReferenceType()
8927                                   ->getAsCXXRecordDecl()
8928                             : nullptr;
8929         if (RD && RD->isLambda())
8930           LambdasMap.try_emplace(std::get<0>(L), C);
8931       }
8932     }
8933   }
8934 
8935   /// Constructor for the declare mapper directive.
8936   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8937       : CurDir(&Dir), CGF(CGF) {}
8938 
8939   /// Generate code for the combined entry if we have a partially mapped struct
8940   /// and take care of the mapping flags of the arguments corresponding to
8941   /// individual struct members.
8942   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8943                          MapFlagsArrayTy &CurTypes,
8944                          const StructRangeInfoTy &PartialStruct,
8945                          const ValueDecl *VD = nullptr,
8946                          bool NotTargetParams = true) const {
8947     if (CurTypes.size() == 1 &&
8948         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8949         !PartialStruct.IsArraySection)
8950       return;
8951     Address LBAddr = PartialStruct.LowestElem.second;
8952     Address HBAddr = PartialStruct.HighestElem.second;
8953     if (PartialStruct.HasCompleteRecord) {
8954       LBAddr = PartialStruct.LB;
8955       HBAddr = PartialStruct.LB;
8956     }
8957     CombinedInfo.Exprs.push_back(VD);
8958     // Base is the base of the struct
8959     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8960     // Pointer is the address of the lowest element
8961     llvm::Value *LB = LBAddr.getPointer();
8962     CombinedInfo.Pointers.push_back(LB);
8963     // There should not be a mapper for a combined entry.
8964     CombinedInfo.Mappers.push_back(nullptr);
8965     // Size is (addr of {highest+1} element) - (addr of lowest element)
8966     llvm::Value *HB = HBAddr.getPointer();
8967     llvm::Value *HAddr =
8968         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8969     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8970     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8971     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8972     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8973                                                   /*isSigned=*/false);
8974     CombinedInfo.Sizes.push_back(Size);
8975     // Map type is always TARGET_PARAM, if generate info for captures.
8976     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8977                                                  : OMP_MAP_TARGET_PARAM);
8978     // If any element has the present modifier, then make sure the runtime
8979     // doesn't attempt to allocate the struct.
8980     if (CurTypes.end() !=
8981         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8982           return Type & OMP_MAP_PRESENT;
8983         }))
8984       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8985     // Remove TARGET_PARAM flag from the first element
8986     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8987     // If any element has the ompx_hold modifier, then make sure the runtime
8988     // uses the hold reference count for the struct as a whole so that it won't
8989     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8990     // elements as well so the runtime knows which reference count to check
8991     // when determining whether it's time for device-to-host transfers of
8992     // individual elements.
8993     if (CurTypes.end() !=
8994         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8995           return Type & OMP_MAP_OMPX_HOLD;
8996         })) {
8997       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8998       for (auto &M : CurTypes)
8999         M |= OMP_MAP_OMPX_HOLD;
9000     }
9001 
9002     // All other current entries will be MEMBER_OF the combined entry
9003     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9004     // 0xFFFF in the MEMBER_OF field).
9005     OpenMPOffloadMappingFlags MemberOfFlag =
9006         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9007     for (auto &M : CurTypes)
9008       setCorrectMemberOfFlag(M, MemberOfFlag);
9009   }
9010 
9011   /// Generate all the base pointers, section pointers, sizes, map types, and
9012   /// mappers for the extracted mappable expressions (all included in \a
9013   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9014   /// pair of the relevant declaration and index where it occurs is appended to
9015   /// the device pointers info array.
9016   void generateAllInfo(
9017       MapCombinedInfoTy &CombinedInfo,
9018       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9019           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9020     assert(CurDir.is<const OMPExecutableDirective *>() &&
9021            "Expect a executable directive");
9022     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9023     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9024   }
9025 
9026   /// Generate all the base pointers, section pointers, sizes, map types, and
9027   /// mappers for the extracted map clauses of user-defined mapper (all included
9028   /// in \a CombinedInfo).
9029   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9030     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9031            "Expect a declare mapper directive");
9032     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9033     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9034   }
9035 
9036   /// Emit capture info for lambdas for variables captured by reference.
9037   void generateInfoForLambdaCaptures(
9038       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9039       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9040     const auto *RD = VD->getType()
9041                          .getCanonicalType()
9042                          .getNonReferenceType()
9043                          ->getAsCXXRecordDecl();
9044     if (!RD || !RD->isLambda())
9045       return;
9046     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9047     LValue VDLVal = CGF.MakeAddrLValue(
9048         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9049     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9050     FieldDecl *ThisCapture = nullptr;
9051     RD->getCaptureFields(Captures, ThisCapture);
9052     if (ThisCapture) {
9053       LValue ThisLVal =
9054           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9055       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9056       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9057                                  VDLVal.getPointer(CGF));
9058       CombinedInfo.Exprs.push_back(VD);
9059       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9060       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9061       CombinedInfo.Sizes.push_back(
9062           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9063                                     CGF.Int64Ty, /*isSigned=*/true));
9064       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9065                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9066       CombinedInfo.Mappers.push_back(nullptr);
9067     }
9068     for (const LambdaCapture &LC : RD->captures()) {
9069       if (!LC.capturesVariable())
9070         continue;
9071       const VarDecl *VD = LC.getCapturedVar();
9072       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9073         continue;
9074       auto It = Captures.find(VD);
9075       assert(It != Captures.end() && "Found lambda capture without field.");
9076       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9077       if (LC.getCaptureKind() == LCK_ByRef) {
9078         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9079         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9080                                    VDLVal.getPointer(CGF));
9081         CombinedInfo.Exprs.push_back(VD);
9082         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9083         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9084         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9085             CGF.getTypeSize(
9086                 VD->getType().getCanonicalType().getNonReferenceType()),
9087             CGF.Int64Ty, /*isSigned=*/true));
9088       } else {
9089         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9090         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9091                                    VDLVal.getPointer(CGF));
9092         CombinedInfo.Exprs.push_back(VD);
9093         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9094         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9095         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9096       }
9097       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9098                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9099       CombinedInfo.Mappers.push_back(nullptr);
9100     }
9101   }
9102 
9103   /// Set correct indices for lambdas captures.
9104   void adjustMemberOfForLambdaCaptures(
9105       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9106       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9107       MapFlagsArrayTy &Types) const {
9108     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9109       // Set correct member_of idx for all implicit lambda captures.
9110       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9111                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9112         continue;
9113       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9114       assert(BasePtr && "Unable to find base lambda address.");
9115       int TgtIdx = -1;
9116       for (unsigned J = I; J > 0; --J) {
9117         unsigned Idx = J - 1;
9118         if (Pointers[Idx] != BasePtr)
9119           continue;
9120         TgtIdx = Idx;
9121         break;
9122       }
9123       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9124       // All other current entries will be MEMBER_OF the combined entry
9125       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9126       // 0xFFFF in the MEMBER_OF field).
9127       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9128       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9129     }
9130   }
9131 
9132   /// Generate the base pointers, section pointers, sizes, map types, and
9133   /// mappers associated to a given capture (all included in \a CombinedInfo).
9134   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9135                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9136                               StructRangeInfoTy &PartialStruct) const {
9137     assert(!Cap->capturesVariableArrayType() &&
9138            "Not expecting to generate map info for a variable array type!");
9139 
9140     // We need to know when we generating information for the first component
9141     const ValueDecl *VD = Cap->capturesThis()
9142                               ? nullptr
9143                               : Cap->getCapturedVar()->getCanonicalDecl();
9144 
9145     // for map(to: lambda): skip here, processing it in
9146     // generateDefaultMapInfo
9147     if (LambdasMap.count(VD))
9148       return;
9149 
9150     // If this declaration appears in a is_device_ptr clause we just have to
9151     // pass the pointer by value. If it is a reference to a declaration, we just
9152     // pass its value.
9153     if (DevPointersMap.count(VD)) {
9154       CombinedInfo.Exprs.push_back(VD);
9155       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9156       CombinedInfo.Pointers.push_back(Arg);
9157       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9158           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9159           /*isSigned=*/true));
9160       CombinedInfo.Types.push_back(
9161           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9162           OMP_MAP_TARGET_PARAM);
9163       CombinedInfo.Mappers.push_back(nullptr);
9164       return;
9165     }
9166 
9167     using MapData =
9168         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9169                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9170                    const ValueDecl *, const Expr *>;
9171     SmallVector<MapData, 4> DeclComponentLists;
9172     assert(CurDir.is<const OMPExecutableDirective *>() &&
9173            "Expect a executable directive");
9174     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9175     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9176       const auto *EI = C->getVarRefs().begin();
9177       for (const auto L : C->decl_component_lists(VD)) {
9178         const ValueDecl *VDecl, *Mapper;
9179         // The Expression is not correct if the mapping is implicit
9180         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9181         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9182         std::tie(VDecl, Components, Mapper) = L;
9183         assert(VDecl == VD && "We got information for the wrong declaration??");
9184         assert(!Components.empty() &&
9185                "Not expecting declaration with no component lists.");
9186         DeclComponentLists.emplace_back(Components, C->getMapType(),
9187                                         C->getMapTypeModifiers(),
9188                                         C->isImplicit(), Mapper, E);
9189         ++EI;
9190       }
9191     }
9192     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9193                                              const MapData &RHS) {
9194       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9195       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9196       bool HasPresent =
9197           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9198       bool HasAllocs = MapType == OMPC_MAP_alloc;
9199       MapModifiers = std::get<2>(RHS);
9200       MapType = std::get<1>(LHS);
9201       bool HasPresentR =
9202           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9203       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9204       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9205     });
9206 
9207     // Find overlapping elements (including the offset from the base element).
9208     llvm::SmallDenseMap<
9209         const MapData *,
9210         llvm::SmallVector<
9211             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9212         4>
9213         OverlappedData;
9214     size_t Count = 0;
9215     for (const MapData &L : DeclComponentLists) {
9216       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9217       OpenMPMapClauseKind MapType;
9218       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9219       bool IsImplicit;
9220       const ValueDecl *Mapper;
9221       const Expr *VarRef;
9222       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9223           L;
9224       ++Count;
9225       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9226         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9227         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9228                  VarRef) = L1;
9229         auto CI = Components.rbegin();
9230         auto CE = Components.rend();
9231         auto SI = Components1.rbegin();
9232         auto SE = Components1.rend();
9233         for (; CI != CE && SI != SE; ++CI, ++SI) {
9234           if (CI->getAssociatedExpression()->getStmtClass() !=
9235               SI->getAssociatedExpression()->getStmtClass())
9236             break;
9237           // Are we dealing with different variables/fields?
9238           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9239             break;
9240         }
9241         // Found overlapping if, at least for one component, reached the head
9242         // of the components list.
9243         if (CI == CE || SI == SE) {
9244           // Ignore it if it is the same component.
9245           if (CI == CE && SI == SE)
9246             continue;
9247           const auto It = (SI == SE) ? CI : SI;
9248           // If one component is a pointer and another one is a kind of
9249           // dereference of this pointer (array subscript, section, dereference,
9250           // etc.), it is not an overlapping.
9251           // Same, if one component is a base and another component is a
9252           // dereferenced pointer memberexpr with the same base.
9253           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9254               (std::prev(It)->getAssociatedDeclaration() &&
9255                std::prev(It)
9256                    ->getAssociatedDeclaration()
9257                    ->getType()
9258                    ->isPointerType()) ||
9259               (It->getAssociatedDeclaration() &&
9260                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9261                std::next(It) != CE && std::next(It) != SE))
9262             continue;
9263           const MapData &BaseData = CI == CE ? L : L1;
9264           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9265               SI == SE ? Components : Components1;
9266           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9267           OverlappedElements.getSecond().push_back(SubData);
9268         }
9269       }
9270     }
9271     // Sort the overlapped elements for each item.
9272     llvm::SmallVector<const FieldDecl *, 4> Layout;
9273     if (!OverlappedData.empty()) {
9274       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9275       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9276       while (BaseType != OrigType) {
9277         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9278         OrigType = BaseType->getPointeeOrArrayElementType();
9279       }
9280 
9281       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9282         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9283       else {
9284         const auto *RD = BaseType->getAsRecordDecl();
9285         Layout.append(RD->field_begin(), RD->field_end());
9286       }
9287     }
9288     for (auto &Pair : OverlappedData) {
9289       llvm::stable_sort(
9290           Pair.getSecond(),
9291           [&Layout](
9292               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9293               OMPClauseMappableExprCommon::MappableExprComponentListRef
9294                   Second) {
9295             auto CI = First.rbegin();
9296             auto CE = First.rend();
9297             auto SI = Second.rbegin();
9298             auto SE = Second.rend();
9299             for (; CI != CE && SI != SE; ++CI, ++SI) {
9300               if (CI->getAssociatedExpression()->getStmtClass() !=
9301                   SI->getAssociatedExpression()->getStmtClass())
9302                 break;
9303               // Are we dealing with different variables/fields?
9304               if (CI->getAssociatedDeclaration() !=
9305                   SI->getAssociatedDeclaration())
9306                 break;
9307             }
9308 
9309             // Lists contain the same elements.
9310             if (CI == CE && SI == SE)
9311               return false;
9312 
9313             // List with less elements is less than list with more elements.
9314             if (CI == CE || SI == SE)
9315               return CI == CE;
9316 
9317             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9318             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9319             if (FD1->getParent() == FD2->getParent())
9320               return FD1->getFieldIndex() < FD2->getFieldIndex();
9321             const auto *It =
9322                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9323                   return FD == FD1 || FD == FD2;
9324                 });
9325             return *It == FD1;
9326           });
9327     }
9328 
9329     // Associated with a capture, because the mapping flags depend on it.
9330     // Go through all of the elements with the overlapped elements.
9331     bool IsFirstComponentList = true;
9332     for (const auto &Pair : OverlappedData) {
9333       const MapData &L = *Pair.getFirst();
9334       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9335       OpenMPMapClauseKind MapType;
9336       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9337       bool IsImplicit;
9338       const ValueDecl *Mapper;
9339       const Expr *VarRef;
9340       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9341           L;
9342       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9343           OverlappedComponents = Pair.getSecond();
9344       generateInfoForComponentList(
9345           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9346           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9347           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9348       IsFirstComponentList = false;
9349     }
9350     // Go through other elements without overlapped elements.
9351     for (const MapData &L : DeclComponentLists) {
9352       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9353       OpenMPMapClauseKind MapType;
9354       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9355       bool IsImplicit;
9356       const ValueDecl *Mapper;
9357       const Expr *VarRef;
9358       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9359           L;
9360       auto It = OverlappedData.find(&L);
9361       if (It == OverlappedData.end())
9362         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9363                                      Components, CombinedInfo, PartialStruct,
9364                                      IsFirstComponentList, IsImplicit, Mapper,
9365                                      /*ForDeviceAddr=*/false, VD, VarRef);
9366       IsFirstComponentList = false;
9367     }
9368   }
9369 
9370   /// Generate the default map information for a given capture \a CI,
9371   /// record field declaration \a RI and captured value \a CV.
9372   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9373                               const FieldDecl &RI, llvm::Value *CV,
9374                               MapCombinedInfoTy &CombinedInfo) const {
9375     bool IsImplicit = true;
9376     // Do the default mapping.
9377     if (CI.capturesThis()) {
9378       CombinedInfo.Exprs.push_back(nullptr);
9379       CombinedInfo.BasePointers.push_back(CV);
9380       CombinedInfo.Pointers.push_back(CV);
9381       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9382       CombinedInfo.Sizes.push_back(
9383           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9384                                     CGF.Int64Ty, /*isSigned=*/true));
9385       // Default map type.
9386       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9387     } else if (CI.capturesVariableByCopy()) {
9388       const VarDecl *VD = CI.getCapturedVar();
9389       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9390       CombinedInfo.BasePointers.push_back(CV);
9391       CombinedInfo.Pointers.push_back(CV);
9392       if (!RI.getType()->isAnyPointerType()) {
9393         // We have to signal to the runtime captures passed by value that are
9394         // not pointers.
9395         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9396         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9397             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9398       } else {
9399         // Pointers are implicitly mapped with a zero size and no flags
9400         // (other than first map that is added for all implicit maps).
9401         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9402         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9403       }
9404       auto I = FirstPrivateDecls.find(VD);
9405       if (I != FirstPrivateDecls.end())
9406         IsImplicit = I->getSecond();
9407     } else {
9408       assert(CI.capturesVariable() && "Expected captured reference.");
9409       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9410       QualType ElementType = PtrTy->getPointeeType();
9411       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9412           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9413       // The default map type for a scalar/complex type is 'to' because by
9414       // default the value doesn't have to be retrieved. For an aggregate
9415       // type, the default is 'tofrom'.
9416       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9417       const VarDecl *VD = CI.getCapturedVar();
9418       auto I = FirstPrivateDecls.find(VD);
9419       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9420       CombinedInfo.BasePointers.push_back(CV);
9421       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9422         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9423             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9424             AlignmentSource::Decl));
9425         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9426       } else {
9427         CombinedInfo.Pointers.push_back(CV);
9428       }
9429       if (I != FirstPrivateDecls.end())
9430         IsImplicit = I->getSecond();
9431     }
9432     // Every default map produces a single argument which is a target parameter.
9433     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9434 
9435     // Add flag stating this is an implicit map.
9436     if (IsImplicit)
9437       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9438 
9439     // No user-defined mapper for default mapping.
9440     CombinedInfo.Mappers.push_back(nullptr);
9441   }
9442 };
9443 } // anonymous namespace
9444 
9445 static void emitNonContiguousDescriptor(
9446     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9447     CGOpenMPRuntime::TargetDataInfo &Info) {
9448   CodeGenModule &CGM = CGF.CGM;
9449   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9450       &NonContigInfo = CombinedInfo.NonContigInfo;
9451 
9452   // Build an array of struct descriptor_dim and then assign it to
9453   // offload_args.
9454   //
9455   // struct descriptor_dim {
9456   //  uint64_t offset;
9457   //  uint64_t count;
9458   //  uint64_t stride
9459   // };
9460   ASTContext &C = CGF.getContext();
9461   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9462   RecordDecl *RD;
9463   RD = C.buildImplicitRecord("descriptor_dim");
9464   RD->startDefinition();
9465   addFieldToRecordDecl(C, RD, Int64Ty);
9466   addFieldToRecordDecl(C, RD, Int64Ty);
9467   addFieldToRecordDecl(C, RD, Int64Ty);
9468   RD->completeDefinition();
9469   QualType DimTy = C.getRecordType(RD);
9470 
9471   enum { OffsetFD = 0, CountFD, StrideFD };
9472   // We need two index variable here since the size of "Dims" is the same as the
9473   // size of Components, however, the size of offset, count, and stride is equal
9474   // to the size of base declaration that is non-contiguous.
9475   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9476     // Skip emitting ir if dimension size is 1 since it cannot be
9477     // non-contiguous.
9478     if (NonContigInfo.Dims[I] == 1)
9479       continue;
9480     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9481     QualType ArrayTy =
9482         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9483     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9484     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9485       unsigned RevIdx = EE - II - 1;
9486       LValue DimsLVal = CGF.MakeAddrLValue(
9487           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9488       // Offset
9489       LValue OffsetLVal = CGF.EmitLValueForField(
9490           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9491       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9492       // Count
9493       LValue CountLVal = CGF.EmitLValueForField(
9494           DimsLVal, *std::next(RD->field_begin(), CountFD));
9495       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9496       // Stride
9497       LValue StrideLVal = CGF.EmitLValueForField(
9498           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9499       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9500     }
9501     // args[I] = &dims
9502     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9503         DimsAddr, CGM.Int8PtrTy);
9504     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9505         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9506         Info.PointersArray, 0, I);
9507     Address PAddr(P, CGF.getPointerAlign());
9508     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9509     ++L;
9510   }
9511 }
9512 
9513 // Try to extract the base declaration from a `this->x` expression if possible.
9514 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9515   if (!E)
9516     return nullptr;
9517 
9518   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9519     if (const MemberExpr *ME =
9520             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9521       return ME->getMemberDecl();
9522   return nullptr;
9523 }
9524 
9525 /// Emit a string constant containing the names of the values mapped to the
9526 /// offloading runtime library.
9527 llvm::Constant *
9528 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9529                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9530 
9531   uint32_t SrcLocStrSize;
9532   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9533     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9534 
9535   SourceLocation Loc;
9536   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9537     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9538       Loc = VD->getLocation();
9539     else
9540       Loc = MapExprs.getMapExpr()->getExprLoc();
9541   } else {
9542     Loc = MapExprs.getMapDecl()->getLocation();
9543   }
9544 
9545   std::string ExprName;
9546   if (MapExprs.getMapExpr()) {
9547     PrintingPolicy P(CGF.getContext().getLangOpts());
9548     llvm::raw_string_ostream OS(ExprName);
9549     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9550     OS.flush();
9551   } else {
9552     ExprName = MapExprs.getMapDecl()->getNameAsString();
9553   }
9554 
9555   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9556   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9557                                          PLoc.getLine(), PLoc.getColumn(),
9558                                          SrcLocStrSize);
9559 }
9560 
9561 /// Emit the arrays used to pass the captures and map information to the
9562 /// offloading runtime library. If there is no map or capture information,
9563 /// return nullptr by reference.
9564 static void emitOffloadingArrays(
9565     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9566     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9567     bool IsNonContiguous = false) {
9568   CodeGenModule &CGM = CGF.CGM;
9569   ASTContext &Ctx = CGF.getContext();
9570 
9571   // Reset the array information.
9572   Info.clearArrayInfo();
9573   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9574 
9575   if (Info.NumberOfPtrs) {
9576     // Detect if we have any capture size requiring runtime evaluation of the
9577     // size so that a constant array could be eventually used.
9578     bool hasRuntimeEvaluationCaptureSize = false;
9579     for (llvm::Value *S : CombinedInfo.Sizes)
9580       if (!isa<llvm::Constant>(S)) {
9581         hasRuntimeEvaluationCaptureSize = true;
9582         break;
9583       }
9584 
9585     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9586     QualType PointerArrayType = Ctx.getConstantArrayType(
9587         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9588         /*IndexTypeQuals=*/0);
9589 
9590     Info.BasePointersArray =
9591         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9592     Info.PointersArray =
9593         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9594     Address MappersArray =
9595         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9596     Info.MappersArray = MappersArray.getPointer();
9597 
9598     // If we don't have any VLA types or other types that require runtime
9599     // evaluation, we can use a constant array for the map sizes, otherwise we
9600     // need to fill up the arrays as we do for the pointers.
9601     QualType Int64Ty =
9602         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9603     if (hasRuntimeEvaluationCaptureSize) {
9604       QualType SizeArrayType = Ctx.getConstantArrayType(
9605           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9606           /*IndexTypeQuals=*/0);
9607       Info.SizesArray =
9608           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9609     } else {
9610       // We expect all the sizes to be constant, so we collect them to create
9611       // a constant array.
9612       SmallVector<llvm::Constant *, 16> ConstSizes;
9613       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9614         if (IsNonContiguous &&
9615             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9616           ConstSizes.push_back(llvm::ConstantInt::get(
9617               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9618         } else {
9619           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9620         }
9621       }
9622 
9623       auto *SizesArrayInit = llvm::ConstantArray::get(
9624           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9625       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9626       auto *SizesArrayGbl = new llvm::GlobalVariable(
9627           CGM.getModule(), SizesArrayInit->getType(),
9628           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9629           SizesArrayInit, Name);
9630       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9631       Info.SizesArray = SizesArrayGbl;
9632     }
9633 
9634     // The map types are always constant so we don't need to generate code to
9635     // fill arrays. Instead, we create an array constant.
9636     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9637     llvm::copy(CombinedInfo.Types, Mapping.begin());
9638     std::string MaptypesName =
9639         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9640     auto *MapTypesArrayGbl =
9641         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9642     Info.MapTypesArray = MapTypesArrayGbl;
9643 
9644     // The information types are only built if there is debug information
9645     // requested.
9646     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9647       Info.MapNamesArray = llvm::Constant::getNullValue(
9648           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9649     } else {
9650       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9651         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9652       };
9653       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9654       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9655       std::string MapnamesName =
9656           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9657       auto *MapNamesArrayGbl =
9658           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9659       Info.MapNamesArray = MapNamesArrayGbl;
9660     }
9661 
9662     // If there's a present map type modifier, it must not be applied to the end
9663     // of a region, so generate a separate map type array in that case.
9664     if (Info.separateBeginEndCalls()) {
9665       bool EndMapTypesDiffer = false;
9666       for (uint64_t &Type : Mapping) {
9667         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9668           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9669           EndMapTypesDiffer = true;
9670         }
9671       }
9672       if (EndMapTypesDiffer) {
9673         MapTypesArrayGbl =
9674             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9675         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9676       }
9677     }
9678 
9679     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9680       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9681       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9682           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9683           Info.BasePointersArray, 0, I);
9684       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9685           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9686       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9687       CGF.Builder.CreateStore(BPVal, BPAddr);
9688 
9689       if (Info.requiresDevicePointerInfo())
9690         if (const ValueDecl *DevVD =
9691                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9692           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9693 
9694       llvm::Value *PVal = CombinedInfo.Pointers[I];
9695       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9696           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9697           Info.PointersArray, 0, I);
9698       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9699           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9700       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9701       CGF.Builder.CreateStore(PVal, PAddr);
9702 
9703       if (hasRuntimeEvaluationCaptureSize) {
9704         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9705             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9706             Info.SizesArray,
9707             /*Idx0=*/0,
9708             /*Idx1=*/I);
9709         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9710         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9711                                                           CGM.Int64Ty,
9712                                                           /*isSigned=*/true),
9713                                 SAddr);
9714       }
9715 
9716       // Fill up the mapper array.
9717       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9718       if (CombinedInfo.Mappers[I]) {
9719         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9720             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9721         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9722         Info.HasMapper = true;
9723       }
9724       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9725       CGF.Builder.CreateStore(MFunc, MAddr);
9726     }
9727   }
9728 
9729   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9730       Info.NumberOfPtrs == 0)
9731     return;
9732 
9733   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9734 }
9735 
9736 namespace {
9737 /// Additional arguments for emitOffloadingArraysArgument function.
9738 struct ArgumentsOptions {
9739   bool ForEndCall = false;
9740   ArgumentsOptions() = default;
9741   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9742 };
9743 } // namespace
9744 
9745 /// Emit the arguments to be passed to the runtime library based on the
9746 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9747 /// ForEndCall, emit map types to be passed for the end of the region instead of
9748 /// the beginning.
9749 static void emitOffloadingArraysArgument(
9750     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9751     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9752     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9753     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9754     const ArgumentsOptions &Options = ArgumentsOptions()) {
9755   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9756          "expected region end call to runtime only when end call is separate");
9757   CodeGenModule &CGM = CGF.CGM;
9758   if (Info.NumberOfPtrs) {
9759     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9760         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9761         Info.BasePointersArray,
9762         /*Idx0=*/0, /*Idx1=*/0);
9763     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9764         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9765         Info.PointersArray,
9766         /*Idx0=*/0,
9767         /*Idx1=*/0);
9768     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9769         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9770         /*Idx0=*/0, /*Idx1=*/0);
9771     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9772         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9773         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9774                                                     : Info.MapTypesArray,
9775         /*Idx0=*/0,
9776         /*Idx1=*/0);
9777 
9778     // Only emit the mapper information arrays if debug information is
9779     // requested.
9780     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9781       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9782     else
9783       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9784           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9785           Info.MapNamesArray,
9786           /*Idx0=*/0,
9787           /*Idx1=*/0);
9788     // If there is no user-defined mapper, set the mapper array to nullptr to
9789     // avoid an unnecessary data privatization
9790     if (!Info.HasMapper)
9791       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9792     else
9793       MappersArrayArg =
9794           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9795   } else {
9796     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9797     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9798     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9799     MapTypesArrayArg =
9800         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9801     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803   }
9804 }
9805 
9806 /// Check for inner distribute directive.
9807 static const OMPExecutableDirective *
9808 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9809   const auto *CS = D.getInnermostCapturedStmt();
9810   const auto *Body =
9811       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9812   const Stmt *ChildStmt =
9813       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9814 
9815   if (const auto *NestedDir =
9816           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9817     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9818     switch (D.getDirectiveKind()) {
9819     case OMPD_target:
9820       if (isOpenMPDistributeDirective(DKind))
9821         return NestedDir;
9822       if (DKind == OMPD_teams) {
9823         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9824             /*IgnoreCaptured=*/true);
9825         if (!Body)
9826           return nullptr;
9827         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9828         if (const auto *NND =
9829                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9830           DKind = NND->getDirectiveKind();
9831           if (isOpenMPDistributeDirective(DKind))
9832             return NND;
9833         }
9834       }
9835       return nullptr;
9836     case OMPD_target_teams:
9837       if (isOpenMPDistributeDirective(DKind))
9838         return NestedDir;
9839       return nullptr;
9840     case OMPD_target_parallel:
9841     case OMPD_target_simd:
9842     case OMPD_target_parallel_for:
9843     case OMPD_target_parallel_for_simd:
9844       return nullptr;
9845     case OMPD_target_teams_distribute:
9846     case OMPD_target_teams_distribute_simd:
9847     case OMPD_target_teams_distribute_parallel_for:
9848     case OMPD_target_teams_distribute_parallel_for_simd:
9849     case OMPD_parallel:
9850     case OMPD_for:
9851     case OMPD_parallel_for:
9852     case OMPD_parallel_master:
9853     case OMPD_parallel_sections:
9854     case OMPD_for_simd:
9855     case OMPD_parallel_for_simd:
9856     case OMPD_cancel:
9857     case OMPD_cancellation_point:
9858     case OMPD_ordered:
9859     case OMPD_threadprivate:
9860     case OMPD_allocate:
9861     case OMPD_task:
9862     case OMPD_simd:
9863     case OMPD_tile:
9864     case OMPD_unroll:
9865     case OMPD_sections:
9866     case OMPD_section:
9867     case OMPD_single:
9868     case OMPD_master:
9869     case OMPD_critical:
9870     case OMPD_taskyield:
9871     case OMPD_barrier:
9872     case OMPD_taskwait:
9873     case OMPD_taskgroup:
9874     case OMPD_atomic:
9875     case OMPD_flush:
9876     case OMPD_depobj:
9877     case OMPD_scan:
9878     case OMPD_teams:
9879     case OMPD_target_data:
9880     case OMPD_target_exit_data:
9881     case OMPD_target_enter_data:
9882     case OMPD_distribute:
9883     case OMPD_distribute_simd:
9884     case OMPD_distribute_parallel_for:
9885     case OMPD_distribute_parallel_for_simd:
9886     case OMPD_teams_distribute:
9887     case OMPD_teams_distribute_simd:
9888     case OMPD_teams_distribute_parallel_for:
9889     case OMPD_teams_distribute_parallel_for_simd:
9890     case OMPD_target_update:
9891     case OMPD_declare_simd:
9892     case OMPD_declare_variant:
9893     case OMPD_begin_declare_variant:
9894     case OMPD_end_declare_variant:
9895     case OMPD_declare_target:
9896     case OMPD_end_declare_target:
9897     case OMPD_declare_reduction:
9898     case OMPD_declare_mapper:
9899     case OMPD_taskloop:
9900     case OMPD_taskloop_simd:
9901     case OMPD_master_taskloop:
9902     case OMPD_master_taskloop_simd:
9903     case OMPD_parallel_master_taskloop:
9904     case OMPD_parallel_master_taskloop_simd:
9905     case OMPD_requires:
9906     case OMPD_metadirective:
9907     case OMPD_unknown:
9908     default:
9909       llvm_unreachable("Unexpected directive.");
9910     }
9911   }
9912 
9913   return nullptr;
9914 }
9915 
9916 /// Emit the user-defined mapper function. The code generation follows the
9917 /// pattern in the example below.
9918 /// \code
9919 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9920 ///                                           void *base, void *begin,
9921 ///                                           int64_t size, int64_t type,
9922 ///                                           void *name = nullptr) {
9923 ///   // Allocate space for an array section first or add a base/begin for
9924 ///   // pointer dereference.
9925 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9926 ///       !maptype.IsDelete)
9927 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9928 ///                                 size*sizeof(Ty), clearToFromMember(type));
9929 ///   // Map members.
9930 ///   for (unsigned i = 0; i < size; i++) {
9931 ///     // For each component specified by this mapper:
9932 ///     for (auto c : begin[i]->all_components) {
9933 ///       if (c.hasMapper())
9934 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9935 ///                       c.arg_type, c.arg_name);
9936 ///       else
9937 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9938 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9939 ///                                     c.arg_name);
9940 ///     }
9941 ///   }
9942 ///   // Delete the array section.
9943 ///   if (size > 1 && maptype.IsDelete)
9944 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9945 ///                                 size*sizeof(Ty), clearToFromMember(type));
9946 /// }
9947 /// \endcode
9948 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9949                                             CodeGenFunction *CGF) {
9950   if (UDMMap.count(D) > 0)
9951     return;
9952   ASTContext &C = CGM.getContext();
9953   QualType Ty = D->getType();
9954   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9955   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9956   auto *MapperVarDecl =
9957       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9958   SourceLocation Loc = D->getLocation();
9959   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9960 
9961   // Prepare mapper function arguments and attributes.
9962   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9963                               C.VoidPtrTy, ImplicitParamDecl::Other);
9964   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9965                             ImplicitParamDecl::Other);
9966   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9967                              C.VoidPtrTy, ImplicitParamDecl::Other);
9968   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9969                             ImplicitParamDecl::Other);
9970   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9971                             ImplicitParamDecl::Other);
9972   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9973                             ImplicitParamDecl::Other);
9974   FunctionArgList Args;
9975   Args.push_back(&HandleArg);
9976   Args.push_back(&BaseArg);
9977   Args.push_back(&BeginArg);
9978   Args.push_back(&SizeArg);
9979   Args.push_back(&TypeArg);
9980   Args.push_back(&NameArg);
9981   const CGFunctionInfo &FnInfo =
9982       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9983   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9984   SmallString<64> TyStr;
9985   llvm::raw_svector_ostream Out(TyStr);
9986   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9987   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9988   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9989                                     Name, &CGM.getModule());
9990   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9991   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9992   // Start the mapper function code generation.
9993   CodeGenFunction MapperCGF(CGM);
9994   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9995   // Compute the starting and end addresses of array elements.
9996   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9997       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9998       C.getPointerType(Int64Ty), Loc);
9999   // Prepare common arguments for array initiation and deletion.
10000   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10001       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10002       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10003   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10004       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10005       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10006   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10007       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10008       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10009   // Convert the size in bytes into the number of array elements.
10010   Size = MapperCGF.Builder.CreateExactUDiv(
10011       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10012   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10013       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10014   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10015       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10016   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10017       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10018       C.getPointerType(Int64Ty), Loc);
10019   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10020       MapperCGF.GetAddrOfLocalVar(&NameArg),
10021       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10022 
10023   // Emit array initiation if this is an array section and \p MapType indicates
10024   // that memory allocation is required.
10025   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10026   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10027                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10028 
10029   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10030 
10031   // Emit the loop header block.
10032   MapperCGF.EmitBlock(HeadBB);
10033   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10034   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10035   // Evaluate whether the initial condition is satisfied.
10036   llvm::Value *IsEmpty =
10037       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10038   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10039   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10040 
10041   // Emit the loop body block.
10042   MapperCGF.EmitBlock(BodyBB);
10043   llvm::BasicBlock *LastBB = BodyBB;
10044   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10045       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10046   PtrPHI->addIncoming(PtrBegin, EntryBB);
10047   Address PtrCurrent =
10048       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10049                           .getAlignment()
10050                           .alignmentOfArrayElement(ElementSize));
10051   // Privatize the declared variable of mapper to be the current array element.
10052   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10053   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10054   (void)Scope.Privatize();
10055 
10056   // Get map clause information. Fill up the arrays with all mapped variables.
10057   MappableExprsHandler::MapCombinedInfoTy Info;
10058   MappableExprsHandler MEHandler(*D, MapperCGF);
10059   MEHandler.generateAllInfoForMapper(Info);
10060 
10061   // Call the runtime API __tgt_mapper_num_components to get the number of
10062   // pre-existing components.
10063   llvm::Value *OffloadingArgs[] = {Handle};
10064   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10065       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10066                                             OMPRTL___tgt_mapper_num_components),
10067       OffloadingArgs);
10068   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10069       PreviousSize,
10070       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10071 
10072   // Fill up the runtime mapper handle for all components.
10073   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10074     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10075         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10076     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10077         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10078     llvm::Value *CurSizeArg = Info.Sizes[I];
10079     llvm::Value *CurNameArg =
10080         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10081             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10082             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10083 
10084     // Extract the MEMBER_OF field from the map type.
10085     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10086     llvm::Value *MemberMapType =
10087         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10088 
10089     // Combine the map type inherited from user-defined mapper with that
10090     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10091     // bits of the \a MapType, which is the input argument of the mapper
10092     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10093     // bits of MemberMapType.
10094     // [OpenMP 5.0], 1.2.6. map-type decay.
10095     //        | alloc |  to   | from  | tofrom | release | delete
10096     // ----------------------------------------------------------
10097     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10098     // to     | alloc |  to   | alloc |   to   | release | delete
10099     // from   | alloc | alloc | from  |  from  | release | delete
10100     // tofrom | alloc |  to   | from  | tofrom | release | delete
10101     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10102         MapType,
10103         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10104                                    MappableExprsHandler::OMP_MAP_FROM));
10105     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10106     llvm::BasicBlock *AllocElseBB =
10107         MapperCGF.createBasicBlock("omp.type.alloc.else");
10108     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10109     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10110     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10111     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10112     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10113     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10114     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10115     MapperCGF.EmitBlock(AllocBB);
10116     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10117         MemberMapType,
10118         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10119                                      MappableExprsHandler::OMP_MAP_FROM)));
10120     MapperCGF.Builder.CreateBr(EndBB);
10121     MapperCGF.EmitBlock(AllocElseBB);
10122     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10123         LeftToFrom,
10124         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10125     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10126     // In case of to, clear OMP_MAP_FROM.
10127     MapperCGF.EmitBlock(ToBB);
10128     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10129         MemberMapType,
10130         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10131     MapperCGF.Builder.CreateBr(EndBB);
10132     MapperCGF.EmitBlock(ToElseBB);
10133     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10134         LeftToFrom,
10135         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10136     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10137     // In case of from, clear OMP_MAP_TO.
10138     MapperCGF.EmitBlock(FromBB);
10139     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10140         MemberMapType,
10141         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10142     // In case of tofrom, do nothing.
10143     MapperCGF.EmitBlock(EndBB);
10144     LastBB = EndBB;
10145     llvm::PHINode *CurMapType =
10146         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10147     CurMapType->addIncoming(AllocMapType, AllocBB);
10148     CurMapType->addIncoming(ToMapType, ToBB);
10149     CurMapType->addIncoming(FromMapType, FromBB);
10150     CurMapType->addIncoming(MemberMapType, ToElseBB);
10151 
10152     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10153                                      CurSizeArg, CurMapType, CurNameArg};
10154     if (Info.Mappers[I]) {
10155       // Call the corresponding mapper function.
10156       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10157           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10158       assert(MapperFunc && "Expect a valid mapper function is available.");
10159       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10160     } else {
10161       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10162       // data structure.
10163       MapperCGF.EmitRuntimeCall(
10164           OMPBuilder.getOrCreateRuntimeFunction(
10165               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10166           OffloadingArgs);
10167     }
10168   }
10169 
10170   // Update the pointer to point to the next element that needs to be mapped,
10171   // and check whether we have mapped all elements.
10172   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10173   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10174       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10175   PtrPHI->addIncoming(PtrNext, LastBB);
10176   llvm::Value *IsDone =
10177       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10178   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10179   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10180 
10181   MapperCGF.EmitBlock(ExitBB);
10182   // Emit array deletion if this is an array section and \p MapType indicates
10183   // that deletion is required.
10184   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10185                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10186 
10187   // Emit the function exit block.
10188   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10189   MapperCGF.FinishFunction();
10190   UDMMap.try_emplace(D, Fn);
10191   if (CGF) {
10192     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10193     Decls.second.push_back(D);
10194   }
10195 }
10196 
10197 /// Emit the array initialization or deletion portion for user-defined mapper
10198 /// code generation. First, it evaluates whether an array section is mapped and
10199 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10200 /// true, and \a MapType indicates to not delete this array, array
10201 /// initialization code is generated. If \a IsInit is false, and \a MapType
10202 /// indicates to not this array, array deletion code is generated.
10203 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10204     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10205     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10206     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10207     bool IsInit) {
10208   StringRef Prefix = IsInit ? ".init" : ".del";
10209 
10210   // Evaluate if this is an array section.
10211   llvm::BasicBlock *BodyBB =
10212       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10213   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10214       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10215   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10216       MapType,
10217       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10218   llvm::Value *DeleteCond;
10219   llvm::Value *Cond;
10220   if (IsInit) {
10221     // base != begin?
10222     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10223     // IsPtrAndObj?
10224     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10225         MapType,
10226         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10227     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10228     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10229     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10230     DeleteCond = MapperCGF.Builder.CreateIsNull(
10231         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10232   } else {
10233     Cond = IsArray;
10234     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236   }
10237   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10238   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10239 
10240   MapperCGF.EmitBlock(BodyBB);
10241   // Get the array size by multiplying element size and element number (i.e., \p
10242   // Size).
10243   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10244       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10245   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10246   // memory allocation/deletion purpose only.
10247   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10248       MapType,
10249       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10250                                    MappableExprsHandler::OMP_MAP_FROM)));
10251   MapTypeArg = MapperCGF.Builder.CreateOr(
10252       MapTypeArg,
10253       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10254 
10255   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10256   // data structure.
10257   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10258                                    ArraySize, MapTypeArg, MapName};
10259   MapperCGF.EmitRuntimeCall(
10260       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10261                                             OMPRTL___tgt_push_mapper_component),
10262       OffloadingArgs);
10263 }
10264 
10265 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10266     const OMPDeclareMapperDecl *D) {
10267   auto I = UDMMap.find(D);
10268   if (I != UDMMap.end())
10269     return I->second;
10270   emitUserDefinedMapper(D);
10271   return UDMMap.lookup(D);
10272 }
10273 
10274 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10275     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10276     llvm::Value *DeviceID,
10277     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10278                                      const OMPLoopDirective &D)>
10279         SizeEmitter) {
10280   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10281   const OMPExecutableDirective *TD = &D;
10282   // Get nested teams distribute kind directive, if any.
10283   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10284     TD = getNestedDistributeDirective(CGM.getContext(), D);
10285   if (!TD)
10286     return;
10287   const auto *LD = cast<OMPLoopDirective>(TD);
10288   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10289                                                          PrePostActionTy &) {
10290     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10291       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10292       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10293       CGF.EmitRuntimeCall(
10294           OMPBuilder.getOrCreateRuntimeFunction(
10295               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10296           Args);
10297     }
10298   };
10299   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10300 }
10301 
10302 void CGOpenMPRuntime::emitTargetCall(
10303     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10304     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10305     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10306     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10307                                      const OMPLoopDirective &D)>
10308         SizeEmitter) {
10309   if (!CGF.HaveInsertPoint())
10310     return;
10311 
10312   assert(OutlinedFn && "Invalid outlined function!");
10313 
10314   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10315                                  D.hasClausesOfKind<OMPNowaitClause>();
10316   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10317   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10318   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10319                                             PrePostActionTy &) {
10320     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10321   };
10322   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10323 
10324   CodeGenFunction::OMPTargetDataInfo InputInfo;
10325   llvm::Value *MapTypesArray = nullptr;
10326   llvm::Value *MapNamesArray = nullptr;
10327   // Fill up the pointer arrays and transfer execution to the device.
10328   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10329                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10330                     &CapturedVars,
10331                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10332     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10333       // Reverse offloading is not supported, so just execute on the host.
10334       if (RequiresOuterTask) {
10335         CapturedVars.clear();
10336         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10337       }
10338       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10339       return;
10340     }
10341 
10342     // On top of the arrays that were filled up, the target offloading call
10343     // takes as arguments the device id as well as the host pointer. The host
10344     // pointer is used by the runtime library to identify the current target
10345     // region, so it only has to be unique and not necessarily point to
10346     // anything. It could be the pointer to the outlined function that
10347     // implements the target region, but we aren't using that so that the
10348     // compiler doesn't need to keep that, and could therefore inline the host
10349     // function if proven worthwhile during optimization.
10350 
10351     // From this point on, we need to have an ID of the target region defined.
10352     assert(OutlinedFnID && "Invalid outlined function ID!");
10353 
10354     // Emit device ID if any.
10355     llvm::Value *DeviceID;
10356     if (Device.getPointer()) {
10357       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10358               Device.getInt() == OMPC_DEVICE_device_num) &&
10359              "Expected device_num modifier.");
10360       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10361       DeviceID =
10362           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10363     } else {
10364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10365     }
10366 
10367     // Emit the number of elements in the offloading arrays.
10368     llvm::Value *PointerNum =
10369         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10370 
10371     // Return value of the runtime offloading call.
10372     llvm::Value *Return;
10373 
10374     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10375     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10376 
10377     // Source location for the ident struct
10378     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10379 
10380     // Emit tripcount for the target loop-based directive.
10381     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10382 
10383     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10384     // The target region is an outlined function launched by the runtime
10385     // via calls __tgt_target() or __tgt_target_teams().
10386     //
10387     // __tgt_target() launches a target region with one team and one thread,
10388     // executing a serial region.  This master thread may in turn launch
10389     // more threads within its team upon encountering a parallel region,
10390     // however, no additional teams can be launched on the device.
10391     //
10392     // __tgt_target_teams() launches a target region with one or more teams,
10393     // each with one or more threads.  This call is required for target
10394     // constructs such as:
10395     //  'target teams'
10396     //  'target' / 'teams'
10397     //  'target teams distribute parallel for'
10398     //  'target parallel'
10399     // and so on.
10400     //
10401     // Note that on the host and CPU targets, the runtime implementation of
10402     // these calls simply call the outlined function without forking threads.
10403     // The outlined functions themselves have runtime calls to
10404     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10405     // the compiler in emitTeamsCall() and emitParallelCall().
10406     //
10407     // In contrast, on the NVPTX target, the implementation of
10408     // __tgt_target_teams() launches a GPU kernel with the requested number
10409     // of teams and threads so no additional calls to the runtime are required.
10410     if (NumTeams) {
10411       // If we have NumTeams defined this means that we have an enclosed teams
10412       // region. Therefore we also expect to have NumThreads defined. These two
10413       // values should be defined in the presence of a teams directive,
10414       // regardless of having any clauses associated. If the user is using teams
10415       // but no clauses, these two values will be the default that should be
10416       // passed to the runtime library - a 32-bit integer with the value zero.
10417       assert(NumThreads && "Thread limit expression should be available along "
10418                            "with number of teams.");
10419       SmallVector<llvm::Value *> OffloadingArgs = {
10420           RTLoc,
10421           DeviceID,
10422           OutlinedFnID,
10423           PointerNum,
10424           InputInfo.BasePointersArray.getPointer(),
10425           InputInfo.PointersArray.getPointer(),
10426           InputInfo.SizesArray.getPointer(),
10427           MapTypesArray,
10428           MapNamesArray,
10429           InputInfo.MappersArray.getPointer(),
10430           NumTeams,
10431           NumThreads};
10432       if (HasNowait) {
10433         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10434         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10435         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10436         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10437         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10438         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10439       }
10440       Return = CGF.EmitRuntimeCall(
10441           OMPBuilder.getOrCreateRuntimeFunction(
10442               CGM.getModule(), HasNowait
10443                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10444                                    : OMPRTL___tgt_target_teams_mapper),
10445           OffloadingArgs);
10446     } else {
10447       SmallVector<llvm::Value *> OffloadingArgs = {
10448           RTLoc,
10449           DeviceID,
10450           OutlinedFnID,
10451           PointerNum,
10452           InputInfo.BasePointersArray.getPointer(),
10453           InputInfo.PointersArray.getPointer(),
10454           InputInfo.SizesArray.getPointer(),
10455           MapTypesArray,
10456           MapNamesArray,
10457           InputInfo.MappersArray.getPointer()};
10458       if (HasNowait) {
10459         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10460         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10461         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10462         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10463         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10464         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10465       }
10466       Return = CGF.EmitRuntimeCall(
10467           OMPBuilder.getOrCreateRuntimeFunction(
10468               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10469                                          : OMPRTL___tgt_target_mapper),
10470           OffloadingArgs);
10471     }
10472 
10473     // Check the error code and execute the host version if required.
10474     llvm::BasicBlock *OffloadFailedBlock =
10475         CGF.createBasicBlock("omp_offload.failed");
10476     llvm::BasicBlock *OffloadContBlock =
10477         CGF.createBasicBlock("omp_offload.cont");
10478     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10479     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10480 
10481     CGF.EmitBlock(OffloadFailedBlock);
10482     if (RequiresOuterTask) {
10483       CapturedVars.clear();
10484       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10485     }
10486     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10487     CGF.EmitBranch(OffloadContBlock);
10488 
10489     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10490   };
10491 
10492   // Notify that the host version must be executed.
10493   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10494                     RequiresOuterTask](CodeGenFunction &CGF,
10495                                        PrePostActionTy &) {
10496     if (RequiresOuterTask) {
10497       CapturedVars.clear();
10498       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10499     }
10500     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10501   };
10502 
10503   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10504                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10505                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10506     // Fill up the arrays with all the captured variables.
10507     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10508 
10509     // Get mappable expression information.
10510     MappableExprsHandler MEHandler(D, CGF);
10511     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10512     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10513 
10514     auto RI = CS.getCapturedRecordDecl()->field_begin();
10515     auto *CV = CapturedVars.begin();
10516     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10517                                               CE = CS.capture_end();
10518          CI != CE; ++CI, ++RI, ++CV) {
10519       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10520       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10521 
10522       // VLA sizes are passed to the outlined region by copy and do not have map
10523       // information associated.
10524       if (CI->capturesVariableArrayType()) {
10525         CurInfo.Exprs.push_back(nullptr);
10526         CurInfo.BasePointers.push_back(*CV);
10527         CurInfo.Pointers.push_back(*CV);
10528         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10529             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10530         // Copy to the device as an argument. No need to retrieve it.
10531         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10532                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10533                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10534         CurInfo.Mappers.push_back(nullptr);
10535       } else {
10536         // If we have any information in the map clause, we use it, otherwise we
10537         // just do a default mapping.
10538         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10539         if (!CI->capturesThis())
10540           MappedVarSet.insert(CI->getCapturedVar());
10541         else
10542           MappedVarSet.insert(nullptr);
10543         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10544           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10545         // Generate correct mapping for variables captured by reference in
10546         // lambdas.
10547         if (CI->capturesVariable())
10548           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10549                                                   CurInfo, LambdaPointers);
10550       }
10551       // We expect to have at least an element of information for this capture.
10552       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10553              "Non-existing map pointer for capture!");
10554       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10555              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10556              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10557              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10558              "Inconsistent map information sizes!");
10559 
10560       // If there is an entry in PartialStruct it means we have a struct with
10561       // individual members mapped. Emit an extra combined entry.
10562       if (PartialStruct.Base.isValid()) {
10563         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10564         MEHandler.emitCombinedEntry(
10565             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10566             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10567       }
10568 
10569       // We need to append the results of this capture to what we already have.
10570       CombinedInfo.append(CurInfo);
10571     }
10572     // Adjust MEMBER_OF flags for the lambdas captures.
10573     MEHandler.adjustMemberOfForLambdaCaptures(
10574         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10575         CombinedInfo.Types);
10576     // Map any list items in a map clause that were not captures because they
10577     // weren't referenced within the construct.
10578     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10579 
10580     TargetDataInfo Info;
10581     // Fill up the arrays and create the arguments.
10582     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10583     emitOffloadingArraysArgument(
10584         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10585         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10586         {/*ForEndCall=*/false});
10587 
10588     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10589     InputInfo.BasePointersArray =
10590         Address(Info.BasePointersArray, CGM.getPointerAlign());
10591     InputInfo.PointersArray =
10592         Address(Info.PointersArray, CGM.getPointerAlign());
10593     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10594     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10595     MapTypesArray = Info.MapTypesArray;
10596     MapNamesArray = Info.MapNamesArray;
10597     if (RequiresOuterTask)
10598       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10599     else
10600       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10601   };
10602 
10603   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10604                              CodeGenFunction &CGF, PrePostActionTy &) {
10605     if (RequiresOuterTask) {
10606       CodeGenFunction::OMPTargetDataInfo InputInfo;
10607       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10608     } else {
10609       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10610     }
10611   };
10612 
10613   // If we have a target function ID it means that we need to support
10614   // offloading, otherwise, just execute on the host. We need to execute on host
10615   // regardless of the conditional in the if clause if, e.g., the user do not
10616   // specify target triples.
10617   if (OutlinedFnID) {
10618     if (IfCond) {
10619       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10620     } else {
10621       RegionCodeGenTy ThenRCG(TargetThenGen);
10622       ThenRCG(CGF);
10623     }
10624   } else {
10625     RegionCodeGenTy ElseRCG(TargetElseGen);
10626     ElseRCG(CGF);
10627   }
10628 }
10629 
10630 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10631                                                     StringRef ParentName) {
10632   if (!S)
10633     return;
10634 
10635   // Codegen OMP target directives that offload compute to the device.
10636   bool RequiresDeviceCodegen =
10637       isa<OMPExecutableDirective>(S) &&
10638       isOpenMPTargetExecutionDirective(
10639           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10640 
10641   if (RequiresDeviceCodegen) {
10642     const auto &E = *cast<OMPExecutableDirective>(S);
10643     unsigned DeviceID;
10644     unsigned FileID;
10645     unsigned Line;
10646     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10647                              FileID, Line);
10648 
10649     // Is this a target region that should not be emitted as an entry point? If
10650     // so just signal we are done with this target region.
10651     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10652                                                             ParentName, Line))
10653       return;
10654 
10655     switch (E.getDirectiveKind()) {
10656     case OMPD_target:
10657       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10658                                                    cast<OMPTargetDirective>(E));
10659       break;
10660     case OMPD_target_parallel:
10661       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10662           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10663       break;
10664     case OMPD_target_teams:
10665       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10666           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10667       break;
10668     case OMPD_target_teams_distribute:
10669       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10670           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10671       break;
10672     case OMPD_target_teams_distribute_simd:
10673       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10674           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10675       break;
10676     case OMPD_target_parallel_for:
10677       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10678           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10679       break;
10680     case OMPD_target_parallel_for_simd:
10681       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10682           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10683       break;
10684     case OMPD_target_simd:
10685       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10686           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10687       break;
10688     case OMPD_target_teams_distribute_parallel_for:
10689       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10690           CGM, ParentName,
10691           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10692       break;
10693     case OMPD_target_teams_distribute_parallel_for_simd:
10694       CodeGenFunction::
10695           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10696               CGM, ParentName,
10697               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10698       break;
10699     case OMPD_parallel:
10700     case OMPD_for:
10701     case OMPD_parallel_for:
10702     case OMPD_parallel_master:
10703     case OMPD_parallel_sections:
10704     case OMPD_for_simd:
10705     case OMPD_parallel_for_simd:
10706     case OMPD_cancel:
10707     case OMPD_cancellation_point:
10708     case OMPD_ordered:
10709     case OMPD_threadprivate:
10710     case OMPD_allocate:
10711     case OMPD_task:
10712     case OMPD_simd:
10713     case OMPD_tile:
10714     case OMPD_unroll:
10715     case OMPD_sections:
10716     case OMPD_section:
10717     case OMPD_single:
10718     case OMPD_master:
10719     case OMPD_critical:
10720     case OMPD_taskyield:
10721     case OMPD_barrier:
10722     case OMPD_taskwait:
10723     case OMPD_taskgroup:
10724     case OMPD_atomic:
10725     case OMPD_flush:
10726     case OMPD_depobj:
10727     case OMPD_scan:
10728     case OMPD_teams:
10729     case OMPD_target_data:
10730     case OMPD_target_exit_data:
10731     case OMPD_target_enter_data:
10732     case OMPD_distribute:
10733     case OMPD_distribute_simd:
10734     case OMPD_distribute_parallel_for:
10735     case OMPD_distribute_parallel_for_simd:
10736     case OMPD_teams_distribute:
10737     case OMPD_teams_distribute_simd:
10738     case OMPD_teams_distribute_parallel_for:
10739     case OMPD_teams_distribute_parallel_for_simd:
10740     case OMPD_target_update:
10741     case OMPD_declare_simd:
10742     case OMPD_declare_variant:
10743     case OMPD_begin_declare_variant:
10744     case OMPD_end_declare_variant:
10745     case OMPD_declare_target:
10746     case OMPD_end_declare_target:
10747     case OMPD_declare_reduction:
10748     case OMPD_declare_mapper:
10749     case OMPD_taskloop:
10750     case OMPD_taskloop_simd:
10751     case OMPD_master_taskloop:
10752     case OMPD_master_taskloop_simd:
10753     case OMPD_parallel_master_taskloop:
10754     case OMPD_parallel_master_taskloop_simd:
10755     case OMPD_requires:
10756     case OMPD_metadirective:
10757     case OMPD_unknown:
10758     default:
10759       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10760     }
10761     return;
10762   }
10763 
10764   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10765     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10766       return;
10767 
10768     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10769     return;
10770   }
10771 
10772   // If this is a lambda function, look into its body.
10773   if (const auto *L = dyn_cast<LambdaExpr>(S))
10774     S = L->getBody();
10775 
10776   // Keep looking for target regions recursively.
10777   for (const Stmt *II : S->children())
10778     scanForTargetRegionsFunctions(II, ParentName);
10779 }
10780 
10781 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10782   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10783       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10784   if (!DevTy)
10785     return false;
10786   // Do not emit device_type(nohost) functions for the host.
10787   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10788     return true;
10789   // Do not emit device_type(host) functions for the device.
10790   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10791     return true;
10792   return false;
10793 }
10794 
10795 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10796   // If emitting code for the host, we do not process FD here. Instead we do
10797   // the normal code generation.
10798   if (!CGM.getLangOpts().OpenMPIsDevice) {
10799     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10800       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10801                                   CGM.getLangOpts().OpenMPIsDevice))
10802         return true;
10803     return false;
10804   }
10805 
10806   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10807   // Try to detect target regions in the function.
10808   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10809     StringRef Name = CGM.getMangledName(GD);
10810     scanForTargetRegionsFunctions(FD->getBody(), Name);
10811     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10812                                 CGM.getLangOpts().OpenMPIsDevice))
10813       return true;
10814   }
10815 
10816   // Do not to emit function if it is not marked as declare target.
10817   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10818          AlreadyEmittedTargetDecls.count(VD) == 0;
10819 }
10820 
10821 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10822   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10823                               CGM.getLangOpts().OpenMPIsDevice))
10824     return true;
10825 
10826   if (!CGM.getLangOpts().OpenMPIsDevice)
10827     return false;
10828 
10829   // Check if there are Ctors/Dtors in this declaration and look for target
10830   // regions in it. We use the complete variant to produce the kernel name
10831   // mangling.
10832   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10833   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10834     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10835       StringRef ParentName =
10836           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10837       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10838     }
10839     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10840       StringRef ParentName =
10841           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10842       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10843     }
10844   }
10845 
10846   // Do not to emit variable if it is not marked as declare target.
10847   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10848       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10849           cast<VarDecl>(GD.getDecl()));
10850   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10851       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10852        HasRequiresUnifiedSharedMemory)) {
10853     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10854     return true;
10855   }
10856   return false;
10857 }
10858 
10859 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10860                                                    llvm::Constant *Addr) {
10861   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10862       !CGM.getLangOpts().OpenMPIsDevice)
10863     return;
10864 
10865   // If we have host/nohost variables, they do not need to be registered.
10866   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10867       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10868   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10869     return;
10870 
10871   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10872       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10873   if (!Res) {
10874     if (CGM.getLangOpts().OpenMPIsDevice) {
10875       // Register non-target variables being emitted in device code (debug info
10876       // may cause this).
10877       StringRef VarName = CGM.getMangledName(VD);
10878       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10879     }
10880     return;
10881   }
10882   // Register declare target variables.
10883   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10884   StringRef VarName;
10885   CharUnits VarSize;
10886   llvm::GlobalValue::LinkageTypes Linkage;
10887 
10888   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10889       !HasRequiresUnifiedSharedMemory) {
10890     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10891     VarName = CGM.getMangledName(VD);
10892     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10893       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10894       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10895     } else {
10896       VarSize = CharUnits::Zero();
10897     }
10898     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10899     // Temp solution to prevent optimizations of the internal variables.
10900     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10901       // Do not create a "ref-variable" if the original is not also available
10902       // on the host.
10903       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10904         return;
10905       std::string RefName = getName({VarName, "ref"});
10906       if (!CGM.GetGlobalValue(RefName)) {
10907         llvm::Constant *AddrRef =
10908             getOrCreateInternalVariable(Addr->getType(), RefName);
10909         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10910         GVAddrRef->setConstant(/*Val=*/true);
10911         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10912         GVAddrRef->setInitializer(Addr);
10913         CGM.addCompilerUsedGlobal(GVAddrRef);
10914       }
10915     }
10916   } else {
10917     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10918             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10919              HasRequiresUnifiedSharedMemory)) &&
10920            "Declare target attribute must link or to with unified memory.");
10921     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10922       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10923     else
10924       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10925 
10926     if (CGM.getLangOpts().OpenMPIsDevice) {
10927       VarName = Addr->getName();
10928       Addr = nullptr;
10929     } else {
10930       VarName = getAddrOfDeclareTargetVar(VD).getName();
10931       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10932     }
10933     VarSize = CGM.getPointerSize();
10934     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10935   }
10936 
10937   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10938       VarName, Addr, VarSize, Flags, Linkage);
10939 }
10940 
10941 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10942   if (isa<FunctionDecl>(GD.getDecl()) ||
10943       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10944     return emitTargetFunctions(GD);
10945 
10946   return emitTargetGlobalVariable(GD);
10947 }
10948 
10949 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10950   for (const VarDecl *VD : DeferredGlobalVariables) {
10951     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10952         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10953     if (!Res)
10954       continue;
10955     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10956         !HasRequiresUnifiedSharedMemory) {
10957       CGM.EmitGlobal(VD);
10958     } else {
10959       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10960               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10961                HasRequiresUnifiedSharedMemory)) &&
10962              "Expected link clause or to clause with unified memory.");
10963       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10964     }
10965   }
10966 }
10967 
10968 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10969     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10970   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10971          " Expected target-based directive.");
10972 }
10973 
10974 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10975   for (const OMPClause *Clause : D->clauselists()) {
10976     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10977       HasRequiresUnifiedSharedMemory = true;
10978     } else if (const auto *AC =
10979                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10980       switch (AC->getAtomicDefaultMemOrderKind()) {
10981       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10982         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10983         break;
10984       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10985         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10986         break;
10987       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10988         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10989         break;
10990       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10991         break;
10992       }
10993     }
10994   }
10995 }
10996 
10997 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10998   return RequiresAtomicOrdering;
10999 }
11000 
11001 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11002                                                        LangAS &AS) {
11003   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11004     return false;
11005   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11006   switch(A->getAllocatorType()) {
11007   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11008   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11009   // Not supported, fallback to the default mem space.
11010   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11011   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11012   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11013   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11014   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11015   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11016   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11017     AS = LangAS::Default;
11018     return true;
11019   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11020     llvm_unreachable("Expected predefined allocator for the variables with the "
11021                      "static storage.");
11022   }
11023   return false;
11024 }
11025 
11026 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11027   return HasRequiresUnifiedSharedMemory;
11028 }
11029 
11030 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11031     CodeGenModule &CGM)
11032     : CGM(CGM) {
11033   if (CGM.getLangOpts().OpenMPIsDevice) {
11034     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11035     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11036   }
11037 }
11038 
11039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11040   if (CGM.getLangOpts().OpenMPIsDevice)
11041     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11042 }
11043 
11044 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11045   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11046     return true;
11047 
11048   const auto *D = cast<FunctionDecl>(GD.getDecl());
11049   // Do not to emit function if it is marked as declare target as it was already
11050   // emitted.
11051   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11052     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11053       if (auto *F = dyn_cast_or_null<llvm::Function>(
11054               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11055         return !F->isDeclaration();
11056       return false;
11057     }
11058     return true;
11059   }
11060 
11061   return !AlreadyEmittedTargetDecls.insert(D).second;
11062 }
11063 
11064 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11065   // If we don't have entries or if we are emitting code for the device, we
11066   // don't need to do anything.
11067   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11068       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11069       (OffloadEntriesInfoManager.empty() &&
11070        !HasEmittedDeclareTargetRegion &&
11071        !HasEmittedTargetRegion))
11072     return nullptr;
11073 
11074   // Create and register the function that handles the requires directives.
11075   ASTContext &C = CGM.getContext();
11076 
11077   llvm::Function *RequiresRegFn;
11078   {
11079     CodeGenFunction CGF(CGM);
11080     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11081     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11082     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11083     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11084     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11085     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11086     // TODO: check for other requires clauses.
11087     // The requires directive takes effect only when a target region is
11088     // present in the compilation unit. Otherwise it is ignored and not
11089     // passed to the runtime. This avoids the runtime from throwing an error
11090     // for mismatching requires clauses across compilation units that don't
11091     // contain at least 1 target region.
11092     assert((HasEmittedTargetRegion ||
11093             HasEmittedDeclareTargetRegion ||
11094             !OffloadEntriesInfoManager.empty()) &&
11095            "Target or declare target region expected.");
11096     if (HasRequiresUnifiedSharedMemory)
11097       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11098     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11099                             CGM.getModule(), OMPRTL___tgt_register_requires),
11100                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11101     CGF.FinishFunction();
11102   }
11103   return RequiresRegFn;
11104 }
11105 
11106 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11107                                     const OMPExecutableDirective &D,
11108                                     SourceLocation Loc,
11109                                     llvm::Function *OutlinedFn,
11110                                     ArrayRef<llvm::Value *> CapturedVars) {
11111   if (!CGF.HaveInsertPoint())
11112     return;
11113 
11114   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11115   CodeGenFunction::RunCleanupsScope Scope(CGF);
11116 
11117   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11118   llvm::Value *Args[] = {
11119       RTLoc,
11120       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11121       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11122   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11123   RealArgs.append(std::begin(Args), std::end(Args));
11124   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11125 
11126   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11127       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11128   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11129 }
11130 
11131 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11132                                          const Expr *NumTeams,
11133                                          const Expr *ThreadLimit,
11134                                          SourceLocation Loc) {
11135   if (!CGF.HaveInsertPoint())
11136     return;
11137 
11138   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11139 
11140   llvm::Value *NumTeamsVal =
11141       NumTeams
11142           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11143                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11144           : CGF.Builder.getInt32(0);
11145 
11146   llvm::Value *ThreadLimitVal =
11147       ThreadLimit
11148           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11149                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11150           : CGF.Builder.getInt32(0);
11151 
11152   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11153   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11154                                      ThreadLimitVal};
11155   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11156                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11157                       PushNumTeamsArgs);
11158 }
11159 
11160 void CGOpenMPRuntime::emitTargetDataCalls(
11161     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11162     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11163   if (!CGF.HaveInsertPoint())
11164     return;
11165 
11166   // Action used to replace the default codegen action and turn privatization
11167   // off.
11168   PrePostActionTy NoPrivAction;
11169 
11170   // Generate the code for the opening of the data environment. Capture all the
11171   // arguments of the runtime call by reference because they are used in the
11172   // closing of the region.
11173   auto &&BeginThenGen = [this, &D, Device, &Info,
11174                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11175     // Fill up the arrays with all the mapped variables.
11176     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11177 
11178     // Get map clause information.
11179     MappableExprsHandler MEHandler(D, CGF);
11180     MEHandler.generateAllInfo(CombinedInfo);
11181 
11182     // Fill up the arrays and create the arguments.
11183     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11184                          /*IsNonContiguous=*/true);
11185 
11186     llvm::Value *BasePointersArrayArg = nullptr;
11187     llvm::Value *PointersArrayArg = nullptr;
11188     llvm::Value *SizesArrayArg = nullptr;
11189     llvm::Value *MapTypesArrayArg = nullptr;
11190     llvm::Value *MapNamesArrayArg = nullptr;
11191     llvm::Value *MappersArrayArg = nullptr;
11192     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11193                                  SizesArrayArg, MapTypesArrayArg,
11194                                  MapNamesArrayArg, MappersArrayArg, Info);
11195 
11196     // Emit device ID if any.
11197     llvm::Value *DeviceID = nullptr;
11198     if (Device) {
11199       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11200                                            CGF.Int64Ty, /*isSigned=*/true);
11201     } else {
11202       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11203     }
11204 
11205     // Emit the number of elements in the offloading arrays.
11206     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11207     //
11208     // Source location for the ident struct
11209     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11210 
11211     llvm::Value *OffloadingArgs[] = {RTLoc,
11212                                      DeviceID,
11213                                      PointerNum,
11214                                      BasePointersArrayArg,
11215                                      PointersArrayArg,
11216                                      SizesArrayArg,
11217                                      MapTypesArrayArg,
11218                                      MapNamesArrayArg,
11219                                      MappersArrayArg};
11220     CGF.EmitRuntimeCall(
11221         OMPBuilder.getOrCreateRuntimeFunction(
11222             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11223         OffloadingArgs);
11224 
11225     // If device pointer privatization is required, emit the body of the region
11226     // here. It will have to be duplicated: with and without privatization.
11227     if (!Info.CaptureDeviceAddrMap.empty())
11228       CodeGen(CGF);
11229   };
11230 
11231   // Generate code for the closing of the data region.
11232   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11233                                                 PrePostActionTy &) {
11234     assert(Info.isValid() && "Invalid data environment closing arguments.");
11235 
11236     llvm::Value *BasePointersArrayArg = nullptr;
11237     llvm::Value *PointersArrayArg = nullptr;
11238     llvm::Value *SizesArrayArg = nullptr;
11239     llvm::Value *MapTypesArrayArg = nullptr;
11240     llvm::Value *MapNamesArrayArg = nullptr;
11241     llvm::Value *MappersArrayArg = nullptr;
11242     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11243                                  SizesArrayArg, MapTypesArrayArg,
11244                                  MapNamesArrayArg, MappersArrayArg, Info,
11245                                  {/*ForEndCall=*/true});
11246 
11247     // Emit device ID if any.
11248     llvm::Value *DeviceID = nullptr;
11249     if (Device) {
11250       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11251                                            CGF.Int64Ty, /*isSigned=*/true);
11252     } else {
11253       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11254     }
11255 
11256     // Emit the number of elements in the offloading arrays.
11257     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11258 
11259     // Source location for the ident struct
11260     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11261 
11262     llvm::Value *OffloadingArgs[] = {RTLoc,
11263                                      DeviceID,
11264                                      PointerNum,
11265                                      BasePointersArrayArg,
11266                                      PointersArrayArg,
11267                                      SizesArrayArg,
11268                                      MapTypesArrayArg,
11269                                      MapNamesArrayArg,
11270                                      MappersArrayArg};
11271     CGF.EmitRuntimeCall(
11272         OMPBuilder.getOrCreateRuntimeFunction(
11273             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11274         OffloadingArgs);
11275   };
11276 
11277   // If we need device pointer privatization, we need to emit the body of the
11278   // region with no privatization in the 'else' branch of the conditional.
11279   // Otherwise, we don't have to do anything.
11280   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11281                                                          PrePostActionTy &) {
11282     if (!Info.CaptureDeviceAddrMap.empty()) {
11283       CodeGen.setAction(NoPrivAction);
11284       CodeGen(CGF);
11285     }
11286   };
11287 
11288   // We don't have to do anything to close the region if the if clause evaluates
11289   // to false.
11290   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11291 
11292   if (IfCond) {
11293     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11294   } else {
11295     RegionCodeGenTy RCG(BeginThenGen);
11296     RCG(CGF);
11297   }
11298 
11299   // If we don't require privatization of device pointers, we emit the body in
11300   // between the runtime calls. This avoids duplicating the body code.
11301   if (Info.CaptureDeviceAddrMap.empty()) {
11302     CodeGen.setAction(NoPrivAction);
11303     CodeGen(CGF);
11304   }
11305 
11306   if (IfCond) {
11307     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11308   } else {
11309     RegionCodeGenTy RCG(EndThenGen);
11310     RCG(CGF);
11311   }
11312 }
11313 
11314 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11315     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11316     const Expr *Device) {
11317   if (!CGF.HaveInsertPoint())
11318     return;
11319 
11320   assert((isa<OMPTargetEnterDataDirective>(D) ||
11321           isa<OMPTargetExitDataDirective>(D) ||
11322           isa<OMPTargetUpdateDirective>(D)) &&
11323          "Expecting either target enter, exit data, or update directives.");
11324 
11325   CodeGenFunction::OMPTargetDataInfo InputInfo;
11326   llvm::Value *MapTypesArray = nullptr;
11327   llvm::Value *MapNamesArray = nullptr;
11328   // Generate the code for the opening of the data environment.
11329   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11330                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11331     // Emit device ID if any.
11332     llvm::Value *DeviceID = nullptr;
11333     if (Device) {
11334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11335                                            CGF.Int64Ty, /*isSigned=*/true);
11336     } else {
11337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11338     }
11339 
11340     // Emit the number of elements in the offloading arrays.
11341     llvm::Constant *PointerNum =
11342         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11343 
11344     // Source location for the ident struct
11345     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11346 
11347     llvm::Value *OffloadingArgs[] = {RTLoc,
11348                                      DeviceID,
11349                                      PointerNum,
11350                                      InputInfo.BasePointersArray.getPointer(),
11351                                      InputInfo.PointersArray.getPointer(),
11352                                      InputInfo.SizesArray.getPointer(),
11353                                      MapTypesArray,
11354                                      MapNamesArray,
11355                                      InputInfo.MappersArray.getPointer()};
11356 
11357     // Select the right runtime function call for each standalone
11358     // directive.
11359     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11360     RuntimeFunction RTLFn;
11361     switch (D.getDirectiveKind()) {
11362     case OMPD_target_enter_data:
11363       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11364                         : OMPRTL___tgt_target_data_begin_mapper;
11365       break;
11366     case OMPD_target_exit_data:
11367       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11368                         : OMPRTL___tgt_target_data_end_mapper;
11369       break;
11370     case OMPD_target_update:
11371       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11372                         : OMPRTL___tgt_target_data_update_mapper;
11373       break;
11374     case OMPD_parallel:
11375     case OMPD_for:
11376     case OMPD_parallel_for:
11377     case OMPD_parallel_master:
11378     case OMPD_parallel_sections:
11379     case OMPD_for_simd:
11380     case OMPD_parallel_for_simd:
11381     case OMPD_cancel:
11382     case OMPD_cancellation_point:
11383     case OMPD_ordered:
11384     case OMPD_threadprivate:
11385     case OMPD_allocate:
11386     case OMPD_task:
11387     case OMPD_simd:
11388     case OMPD_tile:
11389     case OMPD_unroll:
11390     case OMPD_sections:
11391     case OMPD_section:
11392     case OMPD_single:
11393     case OMPD_master:
11394     case OMPD_critical:
11395     case OMPD_taskyield:
11396     case OMPD_barrier:
11397     case OMPD_taskwait:
11398     case OMPD_taskgroup:
11399     case OMPD_atomic:
11400     case OMPD_flush:
11401     case OMPD_depobj:
11402     case OMPD_scan:
11403     case OMPD_teams:
11404     case OMPD_target_data:
11405     case OMPD_distribute:
11406     case OMPD_distribute_simd:
11407     case OMPD_distribute_parallel_for:
11408     case OMPD_distribute_parallel_for_simd:
11409     case OMPD_teams_distribute:
11410     case OMPD_teams_distribute_simd:
11411     case OMPD_teams_distribute_parallel_for:
11412     case OMPD_teams_distribute_parallel_for_simd:
11413     case OMPD_declare_simd:
11414     case OMPD_declare_variant:
11415     case OMPD_begin_declare_variant:
11416     case OMPD_end_declare_variant:
11417     case OMPD_declare_target:
11418     case OMPD_end_declare_target:
11419     case OMPD_declare_reduction:
11420     case OMPD_declare_mapper:
11421     case OMPD_taskloop:
11422     case OMPD_taskloop_simd:
11423     case OMPD_master_taskloop:
11424     case OMPD_master_taskloop_simd:
11425     case OMPD_parallel_master_taskloop:
11426     case OMPD_parallel_master_taskloop_simd:
11427     case OMPD_target:
11428     case OMPD_target_simd:
11429     case OMPD_target_teams_distribute:
11430     case OMPD_target_teams_distribute_simd:
11431     case OMPD_target_teams_distribute_parallel_for:
11432     case OMPD_target_teams_distribute_parallel_for_simd:
11433     case OMPD_target_teams:
11434     case OMPD_target_parallel:
11435     case OMPD_target_parallel_for:
11436     case OMPD_target_parallel_for_simd:
11437     case OMPD_requires:
11438     case OMPD_metadirective:
11439     case OMPD_unknown:
11440     default:
11441       llvm_unreachable("Unexpected standalone target data directive.");
11442       break;
11443     }
11444     CGF.EmitRuntimeCall(
11445         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11446         OffloadingArgs);
11447   };
11448 
11449   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11450                           &MapNamesArray](CodeGenFunction &CGF,
11451                                           PrePostActionTy &) {
11452     // Fill up the arrays with all the mapped variables.
11453     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11454 
11455     // Get map clause information.
11456     MappableExprsHandler MEHandler(D, CGF);
11457     MEHandler.generateAllInfo(CombinedInfo);
11458 
11459     TargetDataInfo Info;
11460     // Fill up the arrays and create the arguments.
11461     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11462                          /*IsNonContiguous=*/true);
11463     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11464                              D.hasClausesOfKind<OMPNowaitClause>();
11465     emitOffloadingArraysArgument(
11466         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11467         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11468         {/*ForEndCall=*/false});
11469     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11470     InputInfo.BasePointersArray =
11471         Address(Info.BasePointersArray, CGM.getPointerAlign());
11472     InputInfo.PointersArray =
11473         Address(Info.PointersArray, CGM.getPointerAlign());
11474     InputInfo.SizesArray =
11475         Address(Info.SizesArray, CGM.getPointerAlign());
11476     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11477     MapTypesArray = Info.MapTypesArray;
11478     MapNamesArray = Info.MapNamesArray;
11479     if (RequiresOuterTask)
11480       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11481     else
11482       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11483   };
11484 
11485   if (IfCond) {
11486     emitIfClause(CGF, IfCond, TargetThenGen,
11487                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11488   } else {
11489     RegionCodeGenTy ThenRCG(TargetThenGen);
11490     ThenRCG(CGF);
11491   }
11492 }
11493 
11494 namespace {
11495   /// Kind of parameter in a function with 'declare simd' directive.
11496   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11497   /// Attribute set of the parameter.
11498   struct ParamAttrTy {
11499     ParamKindTy Kind = Vector;
11500     llvm::APSInt StrideOrArg;
11501     llvm::APSInt Alignment;
11502   };
11503 } // namespace
11504 
11505 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11506                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11507   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11508   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11509   // of that clause. The VLEN value must be power of 2.
11510   // In other case the notion of the function`s "characteristic data type" (CDT)
11511   // is used to compute the vector length.
11512   // CDT is defined in the following order:
11513   //   a) For non-void function, the CDT is the return type.
11514   //   b) If the function has any non-uniform, non-linear parameters, then the
11515   //   CDT is the type of the first such parameter.
11516   //   c) If the CDT determined by a) or b) above is struct, union, or class
11517   //   type which is pass-by-value (except for the type that maps to the
11518   //   built-in complex data type), the characteristic data type is int.
11519   //   d) If none of the above three cases is applicable, the CDT is int.
11520   // The VLEN is then determined based on the CDT and the size of vector
11521   // register of that ISA for which current vector version is generated. The
11522   // VLEN is computed using the formula below:
11523   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11524   // where vector register size specified in section 3.2.1 Registers and the
11525   // Stack Frame of original AMD64 ABI document.
11526   QualType RetType = FD->getReturnType();
11527   if (RetType.isNull())
11528     return 0;
11529   ASTContext &C = FD->getASTContext();
11530   QualType CDT;
11531   if (!RetType.isNull() && !RetType->isVoidType()) {
11532     CDT = RetType;
11533   } else {
11534     unsigned Offset = 0;
11535     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11536       if (ParamAttrs[Offset].Kind == Vector)
11537         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11538       ++Offset;
11539     }
11540     if (CDT.isNull()) {
11541       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11542         if (ParamAttrs[I + Offset].Kind == Vector) {
11543           CDT = FD->getParamDecl(I)->getType();
11544           break;
11545         }
11546       }
11547     }
11548   }
11549   if (CDT.isNull())
11550     CDT = C.IntTy;
11551   CDT = CDT->getCanonicalTypeUnqualified();
11552   if (CDT->isRecordType() || CDT->isUnionType())
11553     CDT = C.IntTy;
11554   return C.getTypeSize(CDT);
11555 }
11556 
11557 static void
11558 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11559                            const llvm::APSInt &VLENVal,
11560                            ArrayRef<ParamAttrTy> ParamAttrs,
11561                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11562   struct ISADataTy {
11563     char ISA;
11564     unsigned VecRegSize;
11565   };
11566   ISADataTy ISAData[] = {
11567       {
11568           'b', 128
11569       }, // SSE
11570       {
11571           'c', 256
11572       }, // AVX
11573       {
11574           'd', 256
11575       }, // AVX2
11576       {
11577           'e', 512
11578       }, // AVX512
11579   };
11580   llvm::SmallVector<char, 2> Masked;
11581   switch (State) {
11582   case OMPDeclareSimdDeclAttr::BS_Undefined:
11583     Masked.push_back('N');
11584     Masked.push_back('M');
11585     break;
11586   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11587     Masked.push_back('N');
11588     break;
11589   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11590     Masked.push_back('M');
11591     break;
11592   }
11593   for (char Mask : Masked) {
11594     for (const ISADataTy &Data : ISAData) {
11595       SmallString<256> Buffer;
11596       llvm::raw_svector_ostream Out(Buffer);
11597       Out << "_ZGV" << Data.ISA << Mask;
11598       if (!VLENVal) {
11599         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11600         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11601         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11602       } else {
11603         Out << VLENVal;
11604       }
11605       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11606         switch (ParamAttr.Kind){
11607         case LinearWithVarStride:
11608           Out << 's' << ParamAttr.StrideOrArg;
11609           break;
11610         case Linear:
11611           Out << 'l';
11612           if (ParamAttr.StrideOrArg != 1)
11613             Out << ParamAttr.StrideOrArg;
11614           break;
11615         case Uniform:
11616           Out << 'u';
11617           break;
11618         case Vector:
11619           Out << 'v';
11620           break;
11621         }
11622         if (!!ParamAttr.Alignment)
11623           Out << 'a' << ParamAttr.Alignment;
11624       }
11625       Out << '_' << Fn->getName();
11626       Fn->addFnAttr(Out.str());
11627     }
11628   }
11629 }
11630 
11631 // This are the Functions that are needed to mangle the name of the
11632 // vector functions generated by the compiler, according to the rules
11633 // defined in the "Vector Function ABI specifications for AArch64",
11634 // available at
11635 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11636 
11637 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11638 ///
11639 /// TODO: Need to implement the behavior for reference marked with a
11640 /// var or no linear modifiers (1.b in the section). For this, we
11641 /// need to extend ParamKindTy to support the linear modifiers.
11642 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11643   QT = QT.getCanonicalType();
11644 
11645   if (QT->isVoidType())
11646     return false;
11647 
11648   if (Kind == ParamKindTy::Uniform)
11649     return false;
11650 
11651   if (Kind == ParamKindTy::Linear)
11652     return false;
11653 
11654   // TODO: Handle linear references with modifiers
11655 
11656   if (Kind == ParamKindTy::LinearWithVarStride)
11657     return false;
11658 
11659   return true;
11660 }
11661 
11662 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11663 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11664   QT = QT.getCanonicalType();
11665   unsigned Size = C.getTypeSize(QT);
11666 
11667   // Only scalars and complex within 16 bytes wide set PVB to true.
11668   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11669     return false;
11670 
11671   if (QT->isFloatingType())
11672     return true;
11673 
11674   if (QT->isIntegerType())
11675     return true;
11676 
11677   if (QT->isPointerType())
11678     return true;
11679 
11680   // TODO: Add support for complex types (section 3.1.2, item 2).
11681 
11682   return false;
11683 }
11684 
11685 /// Computes the lane size (LS) of a return type or of an input parameter,
11686 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11687 /// TODO: Add support for references, section 3.2.1, item 1.
11688 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11689   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11690     QualType PTy = QT.getCanonicalType()->getPointeeType();
11691     if (getAArch64PBV(PTy, C))
11692       return C.getTypeSize(PTy);
11693   }
11694   if (getAArch64PBV(QT, C))
11695     return C.getTypeSize(QT);
11696 
11697   return C.getTypeSize(C.getUIntPtrType());
11698 }
11699 
11700 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11701 // signature of the scalar function, as defined in 3.2.2 of the
11702 // AAVFABI.
11703 static std::tuple<unsigned, unsigned, bool>
11704 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11705   QualType RetType = FD->getReturnType().getCanonicalType();
11706 
11707   ASTContext &C = FD->getASTContext();
11708 
11709   bool OutputBecomesInput = false;
11710 
11711   llvm::SmallVector<unsigned, 8> Sizes;
11712   if (!RetType->isVoidType()) {
11713     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11714     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11715       OutputBecomesInput = true;
11716   }
11717   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11718     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11719     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11720   }
11721 
11722   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11723   // The LS of a function parameter / return value can only be a power
11724   // of 2, starting from 8 bits, up to 128.
11725   assert(llvm::all_of(Sizes,
11726                       [](unsigned Size) {
11727                         return Size == 8 || Size == 16 || Size == 32 ||
11728                                Size == 64 || Size == 128;
11729                       }) &&
11730          "Invalid size");
11731 
11732   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11733                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11734                          OutputBecomesInput);
11735 }
11736 
11737 /// Mangle the parameter part of the vector function name according to
11738 /// their OpenMP classification. The mangling function is defined in
11739 /// section 3.5 of the AAVFABI.
11740 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11741   SmallString<256> Buffer;
11742   llvm::raw_svector_ostream Out(Buffer);
11743   for (const auto &ParamAttr : ParamAttrs) {
11744     switch (ParamAttr.Kind) {
11745     case LinearWithVarStride:
11746       Out << "ls" << ParamAttr.StrideOrArg;
11747       break;
11748     case Linear:
11749       Out << 'l';
11750       // Don't print the step value if it is not present or if it is
11751       // equal to 1.
11752       if (ParamAttr.StrideOrArg != 1)
11753         Out << ParamAttr.StrideOrArg;
11754       break;
11755     case Uniform:
11756       Out << 'u';
11757       break;
11758     case Vector:
11759       Out << 'v';
11760       break;
11761     }
11762 
11763     if (!!ParamAttr.Alignment)
11764       Out << 'a' << ParamAttr.Alignment;
11765   }
11766 
11767   return std::string(Out.str());
11768 }
11769 
11770 // Function used to add the attribute. The parameter `VLEN` is
11771 // templated to allow the use of "x" when targeting scalable functions
11772 // for SVE.
11773 template <typename T>
11774 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11775                                  char ISA, StringRef ParSeq,
11776                                  StringRef MangledName, bool OutputBecomesInput,
11777                                  llvm::Function *Fn) {
11778   SmallString<256> Buffer;
11779   llvm::raw_svector_ostream Out(Buffer);
11780   Out << Prefix << ISA << LMask << VLEN;
11781   if (OutputBecomesInput)
11782     Out << "v";
11783   Out << ParSeq << "_" << MangledName;
11784   Fn->addFnAttr(Out.str());
11785 }
11786 
11787 // Helper function to generate the Advanced SIMD names depending on
11788 // the value of the NDS when simdlen is not present.
11789 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11790                                       StringRef Prefix, char ISA,
11791                                       StringRef ParSeq, StringRef MangledName,
11792                                       bool OutputBecomesInput,
11793                                       llvm::Function *Fn) {
11794   switch (NDS) {
11795   case 8:
11796     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11797                          OutputBecomesInput, Fn);
11798     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11799                          OutputBecomesInput, Fn);
11800     break;
11801   case 16:
11802     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11803                          OutputBecomesInput, Fn);
11804     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11805                          OutputBecomesInput, Fn);
11806     break;
11807   case 32:
11808     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11809                          OutputBecomesInput, Fn);
11810     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11811                          OutputBecomesInput, Fn);
11812     break;
11813   case 64:
11814   case 128:
11815     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11816                          OutputBecomesInput, Fn);
11817     break;
11818   default:
11819     llvm_unreachable("Scalar type is too wide.");
11820   }
11821 }
11822 
11823 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11824 static void emitAArch64DeclareSimdFunction(
11825     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11826     ArrayRef<ParamAttrTy> ParamAttrs,
11827     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11828     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11829 
11830   // Get basic data for building the vector signature.
11831   const auto Data = getNDSWDS(FD, ParamAttrs);
11832   const unsigned NDS = std::get<0>(Data);
11833   const unsigned WDS = std::get<1>(Data);
11834   const bool OutputBecomesInput = std::get<2>(Data);
11835 
11836   // Check the values provided via `simdlen` by the user.
11837   // 1. A `simdlen(1)` doesn't produce vector signatures,
11838   if (UserVLEN == 1) {
11839     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11840         DiagnosticsEngine::Warning,
11841         "The clause simdlen(1) has no effect when targeting aarch64.");
11842     CGM.getDiags().Report(SLoc, DiagID);
11843     return;
11844   }
11845 
11846   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11847   // Advanced SIMD output.
11848   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11849     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11850         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11851                                     "power of 2 when targeting Advanced SIMD.");
11852     CGM.getDiags().Report(SLoc, DiagID);
11853     return;
11854   }
11855 
11856   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11857   // limits.
11858   if (ISA == 's' && UserVLEN != 0) {
11859     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11860       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11861           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11862                                       "lanes in the architectural constraints "
11863                                       "for SVE (min is 128-bit, max is "
11864                                       "2048-bit, by steps of 128-bit)");
11865       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11866       return;
11867     }
11868   }
11869 
11870   // Sort out parameter sequence.
11871   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11872   StringRef Prefix = "_ZGV";
11873   // Generate simdlen from user input (if any).
11874   if (UserVLEN) {
11875     if (ISA == 's') {
11876       // SVE generates only a masked function.
11877       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11878                            OutputBecomesInput, Fn);
11879     } else {
11880       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11881       // Advanced SIMD generates one or two functions, depending on
11882       // the `[not]inbranch` clause.
11883       switch (State) {
11884       case OMPDeclareSimdDeclAttr::BS_Undefined:
11885         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11886                              OutputBecomesInput, Fn);
11887         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11888                              OutputBecomesInput, Fn);
11889         break;
11890       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11891         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11892                              OutputBecomesInput, Fn);
11893         break;
11894       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11895         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11896                              OutputBecomesInput, Fn);
11897         break;
11898       }
11899     }
11900   } else {
11901     // If no user simdlen is provided, follow the AAVFABI rules for
11902     // generating the vector length.
11903     if (ISA == 's') {
11904       // SVE, section 3.4.1, item 1.
11905       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11906                            OutputBecomesInput, Fn);
11907     } else {
11908       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11909       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11910       // two vector names depending on the use of the clause
11911       // `[not]inbranch`.
11912       switch (State) {
11913       case OMPDeclareSimdDeclAttr::BS_Undefined:
11914         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11915                                   OutputBecomesInput, Fn);
11916         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11917                                   OutputBecomesInput, Fn);
11918         break;
11919       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11920         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11921                                   OutputBecomesInput, Fn);
11922         break;
11923       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11924         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11925                                   OutputBecomesInput, Fn);
11926         break;
11927       }
11928     }
11929   }
11930 }
11931 
11932 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11933                                               llvm::Function *Fn) {
11934   ASTContext &C = CGM.getContext();
11935   FD = FD->getMostRecentDecl();
11936   // Map params to their positions in function decl.
11937   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11938   if (isa<CXXMethodDecl>(FD))
11939     ParamPositions.try_emplace(FD, 0);
11940   unsigned ParamPos = ParamPositions.size();
11941   for (const ParmVarDecl *P : FD->parameters()) {
11942     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11943     ++ParamPos;
11944   }
11945   while (FD) {
11946     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11947       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11948       // Mark uniform parameters.
11949       for (const Expr *E : Attr->uniforms()) {
11950         E = E->IgnoreParenImpCasts();
11951         unsigned Pos;
11952         if (isa<CXXThisExpr>(E)) {
11953           Pos = ParamPositions[FD];
11954         } else {
11955           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11956                                 ->getCanonicalDecl();
11957           Pos = ParamPositions[PVD];
11958         }
11959         ParamAttrs[Pos].Kind = Uniform;
11960       }
11961       // Get alignment info.
11962       auto NI = Attr->alignments_begin();
11963       for (const Expr *E : Attr->aligneds()) {
11964         E = E->IgnoreParenImpCasts();
11965         unsigned Pos;
11966         QualType ParmTy;
11967         if (isa<CXXThisExpr>(E)) {
11968           Pos = ParamPositions[FD];
11969           ParmTy = E->getType();
11970         } else {
11971           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11972                                 ->getCanonicalDecl();
11973           Pos = ParamPositions[PVD];
11974           ParmTy = PVD->getType();
11975         }
11976         ParamAttrs[Pos].Alignment =
11977             (*NI)
11978                 ? (*NI)->EvaluateKnownConstInt(C)
11979                 : llvm::APSInt::getUnsigned(
11980                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11981                           .getQuantity());
11982         ++NI;
11983       }
11984       // Mark linear parameters.
11985       auto SI = Attr->steps_begin();
11986       auto MI = Attr->modifiers_begin();
11987       for (const Expr *E : Attr->linears()) {
11988         E = E->IgnoreParenImpCasts();
11989         unsigned Pos;
11990         // Rescaling factor needed to compute the linear parameter
11991         // value in the mangled name.
11992         unsigned PtrRescalingFactor = 1;
11993         if (isa<CXXThisExpr>(E)) {
11994           Pos = ParamPositions[FD];
11995         } else {
11996           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11997                                 ->getCanonicalDecl();
11998           Pos = ParamPositions[PVD];
11999           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12000             PtrRescalingFactor = CGM.getContext()
12001                                      .getTypeSizeInChars(P->getPointeeType())
12002                                      .getQuantity();
12003         }
12004         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12005         ParamAttr.Kind = Linear;
12006         // Assuming a stride of 1, for `linear` without modifiers.
12007         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12008         if (*SI) {
12009           Expr::EvalResult Result;
12010           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12011             if (const auto *DRE =
12012                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12013               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12014                 ParamAttr.Kind = LinearWithVarStride;
12015                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12016                     ParamPositions[StridePVD->getCanonicalDecl()]);
12017               }
12018             }
12019           } else {
12020             ParamAttr.StrideOrArg = Result.Val.getInt();
12021           }
12022         }
12023         // If we are using a linear clause on a pointer, we need to
12024         // rescale the value of linear_step with the byte size of the
12025         // pointee type.
12026         if (Linear == ParamAttr.Kind)
12027           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12028         ++SI;
12029         ++MI;
12030       }
12031       llvm::APSInt VLENVal;
12032       SourceLocation ExprLoc;
12033       const Expr *VLENExpr = Attr->getSimdlen();
12034       if (VLENExpr) {
12035         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12036         ExprLoc = VLENExpr->getExprLoc();
12037       }
12038       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12039       if (CGM.getTriple().isX86()) {
12040         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12041       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12042         unsigned VLEN = VLENVal.getExtValue();
12043         StringRef MangledName = Fn->getName();
12044         if (CGM.getTarget().hasFeature("sve"))
12045           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12046                                          MangledName, 's', 128, Fn, ExprLoc);
12047         if (CGM.getTarget().hasFeature("neon"))
12048           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12049                                          MangledName, 'n', 128, Fn, ExprLoc);
12050       }
12051     }
12052     FD = FD->getPreviousDecl();
12053   }
12054 }
12055 
12056 namespace {
12057 /// Cleanup action for doacross support.
12058 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12059 public:
12060   static const int DoacrossFinArgs = 2;
12061 
12062 private:
12063   llvm::FunctionCallee RTLFn;
12064   llvm::Value *Args[DoacrossFinArgs];
12065 
12066 public:
12067   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12068                     ArrayRef<llvm::Value *> CallArgs)
12069       : RTLFn(RTLFn) {
12070     assert(CallArgs.size() == DoacrossFinArgs);
12071     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12072   }
12073   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12074     if (!CGF.HaveInsertPoint())
12075       return;
12076     CGF.EmitRuntimeCall(RTLFn, Args);
12077   }
12078 };
12079 } // namespace
12080 
12081 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12082                                        const OMPLoopDirective &D,
12083                                        ArrayRef<Expr *> NumIterations) {
12084   if (!CGF.HaveInsertPoint())
12085     return;
12086 
12087   ASTContext &C = CGM.getContext();
12088   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12089   RecordDecl *RD;
12090   if (KmpDimTy.isNull()) {
12091     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12092     //  kmp_int64 lo; // lower
12093     //  kmp_int64 up; // upper
12094     //  kmp_int64 st; // stride
12095     // };
12096     RD = C.buildImplicitRecord("kmp_dim");
12097     RD->startDefinition();
12098     addFieldToRecordDecl(C, RD, Int64Ty);
12099     addFieldToRecordDecl(C, RD, Int64Ty);
12100     addFieldToRecordDecl(C, RD, Int64Ty);
12101     RD->completeDefinition();
12102     KmpDimTy = C.getRecordType(RD);
12103   } else {
12104     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12105   }
12106   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12107   QualType ArrayTy =
12108       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12109 
12110   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12111   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12112   enum { LowerFD = 0, UpperFD, StrideFD };
12113   // Fill dims with data.
12114   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12115     LValue DimsLVal = CGF.MakeAddrLValue(
12116         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12117     // dims.upper = num_iterations;
12118     LValue UpperLVal = CGF.EmitLValueForField(
12119         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12120     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12121         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12122         Int64Ty, NumIterations[I]->getExprLoc());
12123     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12124     // dims.stride = 1;
12125     LValue StrideLVal = CGF.EmitLValueForField(
12126         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12127     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12128                           StrideLVal);
12129   }
12130 
12131   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12132   // kmp_int32 num_dims, struct kmp_dim * dims);
12133   llvm::Value *Args[] = {
12134       emitUpdateLocation(CGF, D.getBeginLoc()),
12135       getThreadID(CGF, D.getBeginLoc()),
12136       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12137       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12138           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12139           CGM.VoidPtrTy)};
12140 
12141   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12142       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12143   CGF.EmitRuntimeCall(RTLFn, Args);
12144   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12145       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12146   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12147       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12148   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12149                                              llvm::makeArrayRef(FiniArgs));
12150 }
12151 
12152 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12153                                           const OMPDependClause *C) {
12154   QualType Int64Ty =
12155       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12156   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12157   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12158       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12159   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12160   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12161     const Expr *CounterVal = C->getLoopData(I);
12162     assert(CounterVal);
12163     llvm::Value *CntVal = CGF.EmitScalarConversion(
12164         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12165         CounterVal->getExprLoc());
12166     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12167                           /*Volatile=*/false, Int64Ty);
12168   }
12169   llvm::Value *Args[] = {
12170       emitUpdateLocation(CGF, C->getBeginLoc()),
12171       getThreadID(CGF, C->getBeginLoc()),
12172       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12173   llvm::FunctionCallee RTLFn;
12174   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12175     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12176                                                   OMPRTL___kmpc_doacross_post);
12177   } else {
12178     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12179     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12180                                                   OMPRTL___kmpc_doacross_wait);
12181   }
12182   CGF.EmitRuntimeCall(RTLFn, Args);
12183 }
12184 
12185 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12186                                llvm::FunctionCallee Callee,
12187                                ArrayRef<llvm::Value *> Args) const {
12188   assert(Loc.isValid() && "Outlined function call location must be valid.");
12189   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12190 
12191   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12192     if (Fn->doesNotThrow()) {
12193       CGF.EmitNounwindRuntimeCall(Fn, Args);
12194       return;
12195     }
12196   }
12197   CGF.EmitRuntimeCall(Callee, Args);
12198 }
12199 
12200 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12201     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12202     ArrayRef<llvm::Value *> Args) const {
12203   emitCall(CGF, Loc, OutlinedFn, Args);
12204 }
12205 
12206 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12207   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12208     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12209       HasEmittedDeclareTargetRegion = true;
12210 }
12211 
12212 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12213                                              const VarDecl *NativeParam,
12214                                              const VarDecl *TargetParam) const {
12215   return CGF.GetAddrOfLocalVar(NativeParam);
12216 }
12217 
12218 /// Return allocator value from expression, or return a null allocator (default
12219 /// when no allocator specified).
12220 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12221                                     const Expr *Allocator) {
12222   llvm::Value *AllocVal;
12223   if (Allocator) {
12224     AllocVal = CGF.EmitScalarExpr(Allocator);
12225     // According to the standard, the original allocator type is a enum
12226     // (integer). Convert to pointer type, if required.
12227     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12228                                         CGF.getContext().VoidPtrTy,
12229                                         Allocator->getExprLoc());
12230   } else {
12231     // If no allocator specified, it defaults to the null allocator.
12232     AllocVal = llvm::Constant::getNullValue(
12233         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12234   }
12235   return AllocVal;
12236 }
12237 
12238 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12239                                                    const VarDecl *VD) {
12240   if (!VD)
12241     return Address::invalid();
12242   Address UntiedAddr = Address::invalid();
12243   Address UntiedRealAddr = Address::invalid();
12244   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12245   if (It != FunctionToUntiedTaskStackMap.end()) {
12246     const UntiedLocalVarsAddressesMap &UntiedData =
12247         UntiedLocalVarsStack[It->second];
12248     auto I = UntiedData.find(VD);
12249     if (I != UntiedData.end()) {
12250       UntiedAddr = I->second.first;
12251       UntiedRealAddr = I->second.second;
12252     }
12253   }
12254   const VarDecl *CVD = VD->getCanonicalDecl();
12255   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12256     // Use the default allocation.
12257     if (!isAllocatableDecl(VD))
12258       return UntiedAddr;
12259     llvm::Value *Size;
12260     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12261     if (CVD->getType()->isVariablyModifiedType()) {
12262       Size = CGF.getTypeSize(CVD->getType());
12263       // Align the size: ((size + align - 1) / align) * align
12264       Size = CGF.Builder.CreateNUWAdd(
12265           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12266       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12267       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12268     } else {
12269       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12270       Size = CGM.getSize(Sz.alignTo(Align));
12271     }
12272     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12273     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12274     const Expr *Allocator = AA->getAllocator();
12275     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12276     llvm::Value *Alignment =
12277         AA->getAlignment()
12278             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12279                                         CGM.SizeTy, /*isSigned=*/false)
12280             : nullptr;
12281     SmallVector<llvm::Value *, 4> Args;
12282     Args.push_back(ThreadID);
12283     if (Alignment)
12284       Args.push_back(Alignment);
12285     Args.push_back(Size);
12286     Args.push_back(AllocVal);
12287     llvm::omp::RuntimeFunction FnID =
12288         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12289     llvm::Value *Addr = CGF.EmitRuntimeCall(
12290         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12291         getName({CVD->getName(), ".void.addr"}));
12292     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12293         CGM.getModule(), OMPRTL___kmpc_free);
12294     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12295     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12296         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12297     if (UntiedAddr.isValid())
12298       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12299 
12300     // Cleanup action for allocate support.
12301     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12302       llvm::FunctionCallee RTLFn;
12303       SourceLocation::UIntTy LocEncoding;
12304       Address Addr;
12305       const Expr *AllocExpr;
12306 
12307     public:
12308       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12309                            SourceLocation::UIntTy LocEncoding, Address Addr,
12310                            const Expr *AllocExpr)
12311           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12312             AllocExpr(AllocExpr) {}
12313       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12314         if (!CGF.HaveInsertPoint())
12315           return;
12316         llvm::Value *Args[3];
12317         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12318             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12319         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12320             Addr.getPointer(), CGF.VoidPtrTy);
12321         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12322         Args[2] = AllocVal;
12323         CGF.EmitRuntimeCall(RTLFn, Args);
12324       }
12325     };
12326     Address VDAddr =
12327         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12328     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12329         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12330         VDAddr, Allocator);
12331     if (UntiedRealAddr.isValid())
12332       if (auto *Region =
12333               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12334         Region->emitUntiedSwitch(CGF);
12335     return VDAddr;
12336   }
12337   return UntiedAddr;
12338 }
12339 
12340 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12341                                              const VarDecl *VD) const {
12342   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12343   if (It == FunctionToUntiedTaskStackMap.end())
12344     return false;
12345   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12346 }
12347 
12348 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12349     CodeGenModule &CGM, const OMPLoopDirective &S)
12350     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12351   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12352   if (!NeedToPush)
12353     return;
12354   NontemporalDeclsSet &DS =
12355       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12356   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12357     for (const Stmt *Ref : C->private_refs()) {
12358       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12359       const ValueDecl *VD;
12360       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12361         VD = DRE->getDecl();
12362       } else {
12363         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12364         assert((ME->isImplicitCXXThis() ||
12365                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12366                "Expected member of current class.");
12367         VD = ME->getMemberDecl();
12368       }
12369       DS.insert(VD);
12370     }
12371   }
12372 }
12373 
12374 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12375   if (!NeedToPush)
12376     return;
12377   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12378 }
12379 
12380 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12381     CodeGenFunction &CGF,
12382     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12383                           std::pair<Address, Address>> &LocalVars)
12384     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12385   if (!NeedToPush)
12386     return;
12387   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12388       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12389   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12390 }
12391 
12392 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12393   if (!NeedToPush)
12394     return;
12395   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12396 }
12397 
12398 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12399   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12400 
12401   return llvm::any_of(
12402       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12403       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12404 }
12405 
12406 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12407     const OMPExecutableDirective &S,
12408     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12409     const {
12410   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12411   // Vars in target/task regions must be excluded completely.
12412   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12413       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12414     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12415     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12416     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12417     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12418       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12419         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12420     }
12421   }
12422   // Exclude vars in private clauses.
12423   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12424     for (const Expr *Ref : C->varlists()) {
12425       if (!Ref->getType()->isScalarType())
12426         continue;
12427       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12428       if (!DRE)
12429         continue;
12430       NeedToCheckForLPCs.insert(DRE->getDecl());
12431     }
12432   }
12433   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12434     for (const Expr *Ref : C->varlists()) {
12435       if (!Ref->getType()->isScalarType())
12436         continue;
12437       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12438       if (!DRE)
12439         continue;
12440       NeedToCheckForLPCs.insert(DRE->getDecl());
12441     }
12442   }
12443   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12444     for (const Expr *Ref : C->varlists()) {
12445       if (!Ref->getType()->isScalarType())
12446         continue;
12447       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12448       if (!DRE)
12449         continue;
12450       NeedToCheckForLPCs.insert(DRE->getDecl());
12451     }
12452   }
12453   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12454     for (const Expr *Ref : C->varlists()) {
12455       if (!Ref->getType()->isScalarType())
12456         continue;
12457       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12458       if (!DRE)
12459         continue;
12460       NeedToCheckForLPCs.insert(DRE->getDecl());
12461     }
12462   }
12463   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12464     for (const Expr *Ref : C->varlists()) {
12465       if (!Ref->getType()->isScalarType())
12466         continue;
12467       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12468       if (!DRE)
12469         continue;
12470       NeedToCheckForLPCs.insert(DRE->getDecl());
12471     }
12472   }
12473   for (const Decl *VD : NeedToCheckForLPCs) {
12474     for (const LastprivateConditionalData &Data :
12475          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12476       if (Data.DeclToUniqueName.count(VD) > 0) {
12477         if (!Data.Disabled)
12478           NeedToAddForLPCsAsDisabled.insert(VD);
12479         break;
12480       }
12481     }
12482   }
12483 }
12484 
12485 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12486     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12487     : CGM(CGF.CGM),
12488       Action((CGM.getLangOpts().OpenMP >= 50 &&
12489               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12490                            [](const OMPLastprivateClause *C) {
12491                              return C->getKind() ==
12492                                     OMPC_LASTPRIVATE_conditional;
12493                            }))
12494                  ? ActionToDo::PushAsLastprivateConditional
12495                  : ActionToDo::DoNotPush) {
12496   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12497   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12498     return;
12499   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12500          "Expected a push action.");
12501   LastprivateConditionalData &Data =
12502       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12503   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12504     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12505       continue;
12506 
12507     for (const Expr *Ref : C->varlists()) {
12508       Data.DeclToUniqueName.insert(std::make_pair(
12509           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12510           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12511     }
12512   }
12513   Data.IVLVal = IVLVal;
12514   Data.Fn = CGF.CurFn;
12515 }
12516 
12517 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12518     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12519     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12520   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12521   if (CGM.getLangOpts().OpenMP < 50)
12522     return;
12523   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12524   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12525   if (!NeedToAddForLPCsAsDisabled.empty()) {
12526     Action = ActionToDo::DisableLastprivateConditional;
12527     LastprivateConditionalData &Data =
12528         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12529     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12530       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12531     Data.Fn = CGF.CurFn;
12532     Data.Disabled = true;
12533   }
12534 }
12535 
12536 CGOpenMPRuntime::LastprivateConditionalRAII
12537 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12538     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12539   return LastprivateConditionalRAII(CGF, S);
12540 }
12541 
12542 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12543   if (CGM.getLangOpts().OpenMP < 50)
12544     return;
12545   if (Action == ActionToDo::DisableLastprivateConditional) {
12546     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12547            "Expected list of disabled private vars.");
12548     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12549   }
12550   if (Action == ActionToDo::PushAsLastprivateConditional) {
12551     assert(
12552         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12553         "Expected list of lastprivate conditional vars.");
12554     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12555   }
12556 }
12557 
12558 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12559                                                         const VarDecl *VD) {
12560   ASTContext &C = CGM.getContext();
12561   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12562   if (I == LastprivateConditionalToTypes.end())
12563     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12564   QualType NewType;
12565   const FieldDecl *VDField;
12566   const FieldDecl *FiredField;
12567   LValue BaseLVal;
12568   auto VI = I->getSecond().find(VD);
12569   if (VI == I->getSecond().end()) {
12570     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12571     RD->startDefinition();
12572     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12573     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12574     RD->completeDefinition();
12575     NewType = C.getRecordType(RD);
12576     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12577     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12578     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12579   } else {
12580     NewType = std::get<0>(VI->getSecond());
12581     VDField = std::get<1>(VI->getSecond());
12582     FiredField = std::get<2>(VI->getSecond());
12583     BaseLVal = std::get<3>(VI->getSecond());
12584   }
12585   LValue FiredLVal =
12586       CGF.EmitLValueForField(BaseLVal, FiredField);
12587   CGF.EmitStoreOfScalar(
12588       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12589       FiredLVal);
12590   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12591 }
12592 
12593 namespace {
12594 /// Checks if the lastprivate conditional variable is referenced in LHS.
12595 class LastprivateConditionalRefChecker final
12596     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12597   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12598   const Expr *FoundE = nullptr;
12599   const Decl *FoundD = nullptr;
12600   StringRef UniqueDeclName;
12601   LValue IVLVal;
12602   llvm::Function *FoundFn = nullptr;
12603   SourceLocation Loc;
12604 
12605 public:
12606   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12607     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12608          llvm::reverse(LPM)) {
12609       auto It = D.DeclToUniqueName.find(E->getDecl());
12610       if (It == D.DeclToUniqueName.end())
12611         continue;
12612       if (D.Disabled)
12613         return false;
12614       FoundE = E;
12615       FoundD = E->getDecl()->getCanonicalDecl();
12616       UniqueDeclName = It->second;
12617       IVLVal = D.IVLVal;
12618       FoundFn = D.Fn;
12619       break;
12620     }
12621     return FoundE == E;
12622   }
12623   bool VisitMemberExpr(const MemberExpr *E) {
12624     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12625       return false;
12626     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12627          llvm::reverse(LPM)) {
12628       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12629       if (It == D.DeclToUniqueName.end())
12630         continue;
12631       if (D.Disabled)
12632         return false;
12633       FoundE = E;
12634       FoundD = E->getMemberDecl()->getCanonicalDecl();
12635       UniqueDeclName = It->second;
12636       IVLVal = D.IVLVal;
12637       FoundFn = D.Fn;
12638       break;
12639     }
12640     return FoundE == E;
12641   }
12642   bool VisitStmt(const Stmt *S) {
12643     for (const Stmt *Child : S->children()) {
12644       if (!Child)
12645         continue;
12646       if (const auto *E = dyn_cast<Expr>(Child))
12647         if (!E->isGLValue())
12648           continue;
12649       if (Visit(Child))
12650         return true;
12651     }
12652     return false;
12653   }
12654   explicit LastprivateConditionalRefChecker(
12655       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12656       : LPM(LPM) {}
12657   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12658   getFoundData() const {
12659     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12660   }
12661 };
12662 } // namespace
12663 
12664 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12665                                                        LValue IVLVal,
12666                                                        StringRef UniqueDeclName,
12667                                                        LValue LVal,
12668                                                        SourceLocation Loc) {
12669   // Last updated loop counter for the lastprivate conditional var.
12670   // int<xx> last_iv = 0;
12671   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12672   llvm::Constant *LastIV =
12673       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12674   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12675       IVLVal.getAlignment().getAsAlign());
12676   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12677 
12678   // Last value of the lastprivate conditional.
12679   // decltype(priv_a) last_a;
12680   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12681       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12682   Last->setAlignment(LVal.getAlignment().getAsAlign());
12683   LValue LastLVal = CGF.MakeAddrLValue(
12684       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12685 
12686   // Global loop counter. Required to handle inner parallel-for regions.
12687   // iv
12688   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12689 
12690   // #pragma omp critical(a)
12691   // if (last_iv <= iv) {
12692   //   last_iv = iv;
12693   //   last_a = priv_a;
12694   // }
12695   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12696                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12697     Action.Enter(CGF);
12698     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12699     // (last_iv <= iv) ? Check if the variable is updated and store new
12700     // value in global var.
12701     llvm::Value *CmpRes;
12702     if (IVLVal.getType()->isSignedIntegerType()) {
12703       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12704     } else {
12705       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12706              "Loop iteration variable must be integer.");
12707       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12708     }
12709     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12710     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12711     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12712     // {
12713     CGF.EmitBlock(ThenBB);
12714 
12715     //   last_iv = iv;
12716     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12717 
12718     //   last_a = priv_a;
12719     switch (CGF.getEvaluationKind(LVal.getType())) {
12720     case TEK_Scalar: {
12721       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12722       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12723       break;
12724     }
12725     case TEK_Complex: {
12726       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12727       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12728       break;
12729     }
12730     case TEK_Aggregate:
12731       llvm_unreachable(
12732           "Aggregates are not supported in lastprivate conditional.");
12733     }
12734     // }
12735     CGF.EmitBranch(ExitBB);
12736     // There is no need to emit line number for unconditional branch.
12737     (void)ApplyDebugLocation::CreateEmpty(CGF);
12738     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12739   };
12740 
12741   if (CGM.getLangOpts().OpenMPSimd) {
12742     // Do not emit as a critical region as no parallel region could be emitted.
12743     RegionCodeGenTy ThenRCG(CodeGen);
12744     ThenRCG(CGF);
12745   } else {
12746     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12747   }
12748 }
12749 
12750 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12751                                                          const Expr *LHS) {
12752   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12753     return;
12754   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12755   if (!Checker.Visit(LHS))
12756     return;
12757   const Expr *FoundE;
12758   const Decl *FoundD;
12759   StringRef UniqueDeclName;
12760   LValue IVLVal;
12761   llvm::Function *FoundFn;
12762   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12763       Checker.getFoundData();
12764   if (FoundFn != CGF.CurFn) {
12765     // Special codegen for inner parallel regions.
12766     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12767     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12768     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12769            "Lastprivate conditional is not found in outer region.");
12770     QualType StructTy = std::get<0>(It->getSecond());
12771     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12772     LValue PrivLVal = CGF.EmitLValue(FoundE);
12773     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12774         PrivLVal.getAddress(CGF),
12775         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12776     LValue BaseLVal =
12777         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12778     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12779     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12780                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12781                         FiredLVal, llvm::AtomicOrdering::Unordered,
12782                         /*IsVolatile=*/true, /*isInit=*/false);
12783     return;
12784   }
12785 
12786   // Private address of the lastprivate conditional in the current context.
12787   // priv_a
12788   LValue LVal = CGF.EmitLValue(FoundE);
12789   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12790                                    FoundE->getExprLoc());
12791 }
12792 
12793 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12794     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12795     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12796   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12797     return;
12798   auto Range = llvm::reverse(LastprivateConditionalStack);
12799   auto It = llvm::find_if(
12800       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12801   if (It == Range.end() || It->Fn != CGF.CurFn)
12802     return;
12803   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12804   assert(LPCI != LastprivateConditionalToTypes.end() &&
12805          "Lastprivates must be registered already.");
12806   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12807   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12808   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12809   for (const auto &Pair : It->DeclToUniqueName) {
12810     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12811     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12812       continue;
12813     auto I = LPCI->getSecond().find(Pair.first);
12814     assert(I != LPCI->getSecond().end() &&
12815            "Lastprivate must be rehistered already.");
12816     // bool Cmp = priv_a.Fired != 0;
12817     LValue BaseLVal = std::get<3>(I->getSecond());
12818     LValue FiredLVal =
12819         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12820     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12821     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12822     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12823     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12824     // if (Cmp) {
12825     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12826     CGF.EmitBlock(ThenBB);
12827     Address Addr = CGF.GetAddrOfLocalVar(VD);
12828     LValue LVal;
12829     if (VD->getType()->isReferenceType())
12830       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12831                                            AlignmentSource::Decl);
12832     else
12833       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12834                                 AlignmentSource::Decl);
12835     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12836                                      D.getBeginLoc());
12837     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12838     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12839     // }
12840   }
12841 }
12842 
12843 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12844     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12845     SourceLocation Loc) {
12846   if (CGF.getLangOpts().OpenMP < 50)
12847     return;
12848   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12849   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12850          "Unknown lastprivate conditional variable.");
12851   StringRef UniqueName = It->second;
12852   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12853   // The variable was not updated in the region - exit.
12854   if (!GV)
12855     return;
12856   LValue LPLVal = CGF.MakeAddrLValue(
12857       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12858       PrivLVal.getType().getNonReferenceType());
12859   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12860   CGF.EmitStoreOfScalar(Res, PrivLVal);
12861 }
12862 
12863 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12864     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12865     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12866   llvm_unreachable("Not supported in SIMD-only mode");
12867 }
12868 
12869 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12870     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12871     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12872   llvm_unreachable("Not supported in SIMD-only mode");
12873 }
12874 
12875 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12876     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12877     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12878     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12879     bool Tied, unsigned &NumberOfParts) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12884                                            SourceLocation Loc,
12885                                            llvm::Function *OutlinedFn,
12886                                            ArrayRef<llvm::Value *> CapturedVars,
12887                                            const Expr *IfCond,
12888                                            llvm::Value *NumThreads) {
12889   llvm_unreachable("Not supported in SIMD-only mode");
12890 }
12891 
12892 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12893     CodeGenFunction &CGF, StringRef CriticalName,
12894     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12895     const Expr *Hint) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12900                                            const RegionCodeGenTy &MasterOpGen,
12901                                            SourceLocation Loc) {
12902   llvm_unreachable("Not supported in SIMD-only mode");
12903 }
12904 
12905 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12906                                            const RegionCodeGenTy &MasterOpGen,
12907                                            SourceLocation Loc,
12908                                            const Expr *Filter) {
12909   llvm_unreachable("Not supported in SIMD-only mode");
12910 }
12911 
12912 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12913                                             SourceLocation Loc) {
12914   llvm_unreachable("Not supported in SIMD-only mode");
12915 }
12916 
12917 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12918     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12919     SourceLocation Loc) {
12920   llvm_unreachable("Not supported in SIMD-only mode");
12921 }
12922 
12923 void CGOpenMPSIMDRuntime::emitSingleRegion(
12924     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12925     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12926     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12927     ArrayRef<const Expr *> AssignmentOps) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12932                                             const RegionCodeGenTy &OrderedOpGen,
12933                                             SourceLocation Loc,
12934                                             bool IsThreads) {
12935   llvm_unreachable("Not supported in SIMD-only mode");
12936 }
12937 
12938 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12939                                           SourceLocation Loc,
12940                                           OpenMPDirectiveKind Kind,
12941                                           bool EmitChecks,
12942                                           bool ForceSimpleCall) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
12946 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12947     CodeGenFunction &CGF, SourceLocation Loc,
12948     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12949     bool Ordered, const DispatchRTInput &DispatchValues) {
12950   llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952 
12953 void CGOpenMPSIMDRuntime::emitForStaticInit(
12954     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12955     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12956   llvm_unreachable("Not supported in SIMD-only mode");
12957 }
12958 
12959 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12960     CodeGenFunction &CGF, SourceLocation Loc,
12961     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12966                                                      SourceLocation Loc,
12967                                                      unsigned IVSize,
12968                                                      bool IVSigned) {
12969   llvm_unreachable("Not supported in SIMD-only mode");
12970 }
12971 
12972 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12973                                               SourceLocation Loc,
12974                                               OpenMPDirectiveKind DKind) {
12975   llvm_unreachable("Not supported in SIMD-only mode");
12976 }
12977 
12978 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12979                                               SourceLocation Loc,
12980                                               unsigned IVSize, bool IVSigned,
12981                                               Address IL, Address LB,
12982                                               Address UB, Address ST) {
12983   llvm_unreachable("Not supported in SIMD-only mode");
12984 }
12985 
12986 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12987                                                llvm::Value *NumThreads,
12988                                                SourceLocation Loc) {
12989   llvm_unreachable("Not supported in SIMD-only mode");
12990 }
12991 
12992 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12993                                              ProcBindKind ProcBind,
12994                                              SourceLocation Loc) {
12995   llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997 
12998 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12999                                                     const VarDecl *VD,
13000                                                     Address VDAddr,
13001                                                     SourceLocation Loc) {
13002   llvm_unreachable("Not supported in SIMD-only mode");
13003 }
13004 
13005 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13006     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13007     CodeGenFunction *CGF) {
13008   llvm_unreachable("Not supported in SIMD-only mode");
13009 }
13010 
13011 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13012     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13013   llvm_unreachable("Not supported in SIMD-only mode");
13014 }
13015 
13016 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13017                                     ArrayRef<const Expr *> Vars,
13018                                     SourceLocation Loc,
13019                                     llvm::AtomicOrdering AO) {
13020   llvm_unreachable("Not supported in SIMD-only mode");
13021 }
13022 
13023 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13024                                        const OMPExecutableDirective &D,
13025                                        llvm::Function *TaskFunction,
13026                                        QualType SharedsTy, Address Shareds,
13027                                        const Expr *IfCond,
13028                                        const OMPTaskDataTy &Data) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13033     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13034     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13035     const Expr *IfCond, const OMPTaskDataTy &Data) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 void CGOpenMPSIMDRuntime::emitReduction(
13040     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13041     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13042     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13043   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13044   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13045                                  ReductionOps, Options);
13046 }
13047 
13048 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13049     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13050     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13051   llvm_unreachable("Not supported in SIMD-only mode");
13052 }
13053 
13054 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13055                                                 SourceLocation Loc,
13056                                                 bool IsWorksharingReduction) {
13057   llvm_unreachable("Not supported in SIMD-only mode");
13058 }
13059 
13060 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13061                                                   SourceLocation Loc,
13062                                                   ReductionCodeGen &RCG,
13063                                                   unsigned N) {
13064   llvm_unreachable("Not supported in SIMD-only mode");
13065 }
13066 
13067 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13068                                                   SourceLocation Loc,
13069                                                   llvm::Value *ReductionsPtr,
13070                                                   LValue SharedLVal) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13075                                            SourceLocation Loc,
13076                                            const OMPTaskDataTy &Data) {
13077   llvm_unreachable("Not supported in SIMD-only mode");
13078 }
13079 
13080 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13081     CodeGenFunction &CGF, SourceLocation Loc,
13082     OpenMPDirectiveKind CancelRegion) {
13083   llvm_unreachable("Not supported in SIMD-only mode");
13084 }
13085 
13086 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13087                                          SourceLocation Loc, const Expr *IfCond,
13088                                          OpenMPDirectiveKind CancelRegion) {
13089   llvm_unreachable("Not supported in SIMD-only mode");
13090 }
13091 
13092 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13093     const OMPExecutableDirective &D, StringRef ParentName,
13094     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13095     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13096   llvm_unreachable("Not supported in SIMD-only mode");
13097 }
13098 
13099 void CGOpenMPSIMDRuntime::emitTargetCall(
13100     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13101     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13102     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13103     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13104                                      const OMPLoopDirective &D)>
13105         SizeEmitter) {
13106   llvm_unreachable("Not supported in SIMD-only mode");
13107 }
13108 
13109 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13110   llvm_unreachable("Not supported in SIMD-only mode");
13111 }
13112 
13113 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13114   llvm_unreachable("Not supported in SIMD-only mode");
13115 }
13116 
13117 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13118   return false;
13119 }
13120 
13121 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13122                                         const OMPExecutableDirective &D,
13123                                         SourceLocation Loc,
13124                                         llvm::Function *OutlinedFn,
13125                                         ArrayRef<llvm::Value *> CapturedVars) {
13126   llvm_unreachable("Not supported in SIMD-only mode");
13127 }
13128 
13129 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13130                                              const Expr *NumTeams,
13131                                              const Expr *ThreadLimit,
13132                                              SourceLocation Loc) {
13133   llvm_unreachable("Not supported in SIMD-only mode");
13134 }
13135 
13136 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13137     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13138     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13139   llvm_unreachable("Not supported in SIMD-only mode");
13140 }
13141 
13142 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13143     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13144     const Expr *Device) {
13145   llvm_unreachable("Not supported in SIMD-only mode");
13146 }
13147 
13148 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13149                                            const OMPLoopDirective &D,
13150                                            ArrayRef<Expr *> NumIterations) {
13151   llvm_unreachable("Not supported in SIMD-only mode");
13152 }
13153 
13154 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13155                                               const OMPDependClause *C) {
13156   llvm_unreachable("Not supported in SIMD-only mode");
13157 }
13158 
13159 const VarDecl *
13160 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13161                                         const VarDecl *NativeParam) const {
13162   llvm_unreachable("Not supported in SIMD-only mode");
13163 }
13164 
13165 Address
13166 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13167                                          const VarDecl *NativeParam,
13168                                          const VarDecl *TargetParam) const {
13169   llvm_unreachable("Not supported in SIMD-only mode");
13170 }
13171