xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr =
1452         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564                                              bool IsGPUDistribute) {
1565   assert((IVSize == 32 || IVSize == 64) &&
1566          "IV size is not compatible with the omp runtime");
1567   StringRef Name;
1568   if (IsGPUDistribute)
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570                                     : "__kmpc_distribute_static_init_4u")
1571                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1572                                     : "__kmpc_distribute_static_init_8u");
1573   else
1574     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575                                     : "__kmpc_for_static_init_4u")
1576                         : (IVSigned ? "__kmpc_for_static_init_8"
1577                                     : "__kmpc_for_static_init_8u");
1578 
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1581   llvm::Type *TypeParams[] = {
1582     getIdentTyPointerTy(),                     // loc
1583     CGM.Int32Ty,                               // tid
1584     CGM.Int32Ty,                               // schedtype
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy,                                     // p_stride
1589     ITy,                                       // incr
1590     ITy                                        // chunk
1591   };
1592   auto *FnTy =
1593       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594   return CGM.CreateRuntimeFunction(FnTy, Name);
1595 }
1596 
1597 llvm::FunctionCallee
1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1599   assert((IVSize == 32 || IVSize == 64) &&
1600          "IV size is not compatible with the omp runtime");
1601   StringRef Name =
1602       IVSize == 32
1603           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1604           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1605   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1606   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1607                                CGM.Int32Ty,           // tid
1608                                CGM.Int32Ty,           // schedtype
1609                                ITy,                   // lower
1610                                ITy,                   // upper
1611                                ITy,                   // stride
1612                                ITy                    // chunk
1613   };
1614   auto *FnTy =
1615       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616   return CGM.CreateRuntimeFunction(FnTy, Name);
1617 }
1618 
1619 llvm::FunctionCallee
1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1621   assert((IVSize == 32 || IVSize == 64) &&
1622          "IV size is not compatible with the omp runtime");
1623   StringRef Name =
1624       IVSize == 32
1625           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1626           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1627   llvm::Type *TypeParams[] = {
1628       getIdentTyPointerTy(), // loc
1629       CGM.Int32Ty,           // tid
1630   };
1631   auto *FnTy =
1632       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1633   return CGM.CreateRuntimeFunction(FnTy, Name);
1634 }
1635 
1636 llvm::FunctionCallee
1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   StringRef Name =
1641       IVSize == 32
1642           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1643           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1644   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1650     PtrTy,                                     // p_lower
1651     PtrTy,                                     // p_upper
1652     PtrTy                                      // p_stride
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 /// Obtain information that uniquely identifies a target entry. This
1660 /// consists of the file and device IDs as well as line number associated with
1661 /// the relevant entry source location.
1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1663                                      unsigned &DeviceID, unsigned &FileID,
1664                                      unsigned &LineNum) {
1665   SourceManager &SM = C.getSourceManager();
1666 
1667   // The loc should be always valid and have a file ID (the user cannot use
1668   // #pragma directives in macros)
1669 
1670   assert(Loc.isValid() && "Source location is expected to be always valid.");
1671 
1672   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1673   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674 
1675   llvm::sys::fs::UniqueID ID;
1676   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1677     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1678     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1679     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1680       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1681           << PLoc.getFilename() << EC.message();
1682   }
1683 
1684   DeviceID = ID.getDevice();
1685   FileID = ID.getFile();
1686   LineNum = PLoc.getLine();
1687 }
1688 
1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1690   if (CGM.getLangOpts().OpenMPSimd)
1691     return Address::invalid();
1692   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1693       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1694   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1695               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1696                HasRequiresUnifiedSharedMemory))) {
1697     SmallString<64> PtrName;
1698     {
1699       llvm::raw_svector_ostream OS(PtrName);
1700       OS << CGM.getMangledName(GlobalDecl(VD));
1701       if (!VD->isExternallyVisible()) {
1702         unsigned DeviceID, FileID, Line;
1703         getTargetEntryUniqueInfo(CGM.getContext(),
1704                                  VD->getCanonicalDecl()->getBeginLoc(),
1705                                  DeviceID, FileID, Line);
1706         OS << llvm::format("_%x", FileID);
1707       }
1708       OS << "_decl_tgt_ref_ptr";
1709     }
1710     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1711     if (!Ptr) {
1712       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1713       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1714                                         PtrName);
1715 
1716       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1717       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1718 
1719       if (!CGM.getLangOpts().OpenMPIsDevice)
1720         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1721       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1722     }
1723     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1724   }
1725   return Address::invalid();
1726 }
1727 
1728 llvm::Constant *
1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1730   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1731          !CGM.getContext().getTargetInfo().isTLSSupported());
1732   // Lookup the entry, lazily creating it if necessary.
1733   std::string Suffix = getName({"cache", ""});
1734   return getOrCreateInternalVariable(
1735       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1736 }
1737 
1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1739                                                 const VarDecl *VD,
1740                                                 Address VDAddr,
1741                                                 SourceLocation Loc) {
1742   if (CGM.getLangOpts().OpenMPUseTLS &&
1743       CGM.getContext().getTargetInfo().isTLSSupported())
1744     return VDAddr;
1745 
1746   llvm::Type *VarTy = VDAddr.getElementType();
1747   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1748                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1749                                                        CGM.Int8PtrTy),
1750                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1751                          getOrCreateThreadPrivateCache(VD)};
1752   return Address(CGF.EmitRuntimeCall(
1753                      OMPBuilder.getOrCreateRuntimeFunction(
1754                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1755                      Args),
1756                  VDAddr.getAlignment());
1757 }
1758 
1759 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1760     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1761     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1762   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1763   // library.
1764   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1765   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1766                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1767                       OMPLoc);
1768   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1769   // to register constructor/destructor for variable.
1770   llvm::Value *Args[] = {
1771       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1772       Ctor, CopyCtor, Dtor};
1773   CGF.EmitRuntimeCall(
1774       OMPBuilder.getOrCreateRuntimeFunction(
1775           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1776       Args);
1777 }
1778 
1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1780     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1781     bool PerformInit, CodeGenFunction *CGF) {
1782   if (CGM.getLangOpts().OpenMPUseTLS &&
1783       CGM.getContext().getTargetInfo().isTLSSupported())
1784     return nullptr;
1785 
1786   VD = VD->getDefinition(CGM.getContext());
1787   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1788     QualType ASTTy = VD->getType();
1789 
1790     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1791     const Expr *Init = VD->getAnyInitializer();
1792     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1793       // Generate function that re-emits the declaration's initializer into the
1794       // threadprivate copy of the variable VD
1795       CodeGenFunction CtorCGF(CGM);
1796       FunctionArgList Args;
1797       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1799                             ImplicitParamDecl::Other);
1800       Args.push_back(&Dst);
1801 
1802       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803           CGM.getContext().VoidPtrTy, Args);
1804       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805       std::string Name = getName({"__kmpc_global_ctor_", ""});
1806       llvm::Function *Fn =
1807           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1809                             Args, Loc, Loc);
1810       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1814       Arg = CtorCGF.Builder.CreateElementBitCast(
1815           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1816       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1817                                /*IsInitializer=*/true);
1818       ArgVal = CtorCGF.EmitLoadOfScalar(
1819           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1820           CGM.getContext().VoidPtrTy, Dst.getLocation());
1821       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1822       CtorCGF.FinishFunction();
1823       Ctor = Fn;
1824     }
1825     if (VD->getType().isDestructedType() != QualType::DK_none) {
1826       // Generate function that emits destructor call for the threadprivate copy
1827       // of the variable VD
1828       CodeGenFunction DtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_dtor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1842       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1843                             Loc, Loc);
1844       // Create a scope with an artificial location for the body of this function.
1845       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1846       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1847           DtorCGF.GetAddrOfLocalVar(&Dst),
1848           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1850                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1851                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1852       DtorCGF.FinishFunction();
1853       Dtor = Fn;
1854     }
1855     // Do not emit init function if it is not required.
1856     if (!Ctor && !Dtor)
1857       return nullptr;
1858 
1859     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1860     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1861                                                /*isVarArg=*/false)
1862                            ->getPointerTo();
1863     // Copying constructor for the threadprivate variable.
1864     // Must be NULL - reserved by runtime, but currently it requires that this
1865     // parameter is always NULL. Otherwise it fires assertion.
1866     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1867     if (Ctor == nullptr) {
1868       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869                                              /*isVarArg=*/false)
1870                          ->getPointerTo();
1871       Ctor = llvm::Constant::getNullValue(CtorTy);
1872     }
1873     if (Dtor == nullptr) {
1874       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1875                                              /*isVarArg=*/false)
1876                          ->getPointerTo();
1877       Dtor = llvm::Constant::getNullValue(DtorTy);
1878     }
1879     if (!CGF) {
1880       auto *InitFunctionTy =
1881           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1882       std::string Name = getName({"__omp_threadprivate_init_", ""});
1883       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1884           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1885       CodeGenFunction InitCGF(CGM);
1886       FunctionArgList ArgList;
1887       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1888                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1889                             Loc, Loc);
1890       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891       InitCGF.FinishFunction();
1892       return InitFunction;
1893     }
1894     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1895   }
1896   return nullptr;
1897 }
1898 
1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1900                                                      llvm::GlobalVariable *Addr,
1901                                                      bool PerformInit) {
1902   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1903       !CGM.getLangOpts().OpenMPIsDevice)
1904     return false;
1905   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1906       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1907   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1908       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1909        HasRequiresUnifiedSharedMemory))
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911   VD = VD->getDefinition(CGM.getContext());
1912   assert(VD && "Unknown VarDecl");
1913 
1914   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1915     return CGM.getLangOpts().OpenMPIsDevice;
1916 
1917   QualType ASTTy = VD->getType();
1918   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1919 
1920   // Produce the unique prefix to identify the new target regions. We use
1921   // the source location of the variable declaration which we know to not
1922   // conflict with any target region.
1923   unsigned DeviceID;
1924   unsigned FileID;
1925   unsigned Line;
1926   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1927   SmallString<128> Buffer, Out;
1928   {
1929     llvm::raw_svector_ostream OS(Buffer);
1930     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1931        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1932   }
1933 
1934   const Expr *Init = VD->getAnyInitializer();
1935   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1936     llvm::Constant *Ctor;
1937     llvm::Constant *ID;
1938     if (CGM.getLangOpts().OpenMPIsDevice) {
1939       // Generate function that re-emits the declaration's initializer into
1940       // the threadprivate copy of the variable VD
1941       CodeGenFunction CtorCGF(CGM);
1942 
1943       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1944       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1945       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1946           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1947       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1948       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1949                             FunctionArgList(), Loc, Loc);
1950       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1951       CtorCGF.EmitAnyExprToMem(Init,
1952                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1953                                Init->getType().getQualifiers(),
1954                                /*IsInitializer=*/true);
1955       CtorCGF.FinishFunction();
1956       Ctor = Fn;
1957       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1958       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1959     } else {
1960       Ctor = new llvm::GlobalVariable(
1961           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1962           llvm::GlobalValue::PrivateLinkage,
1963           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1964       ID = Ctor;
1965     }
1966 
1967     // Register the information for the entry associated with the constructor.
1968     Out.clear();
1969     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1970         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1971         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1972   }
1973   if (VD->getType().isDestructedType() != QualType::DK_none) {
1974     llvm::Constant *Dtor;
1975     llvm::Constant *ID;
1976     if (CGM.getLangOpts().OpenMPIsDevice) {
1977       // Generate function that emits destructor call for the threadprivate
1978       // copy of the variable VD
1979       CodeGenFunction DtorCGF(CGM);
1980 
1981       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1982       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1983       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1984           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1985       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1986       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1987                             FunctionArgList(), Loc, Loc);
1988       // Create a scope with an artificial location for the body of this
1989       // function.
1990       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1991       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1992                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994       DtorCGF.FinishFunction();
1995       Dtor = Fn;
1996       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1998     } else {
1999       Dtor = new llvm::GlobalVariable(
2000           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2001           llvm::GlobalValue::PrivateLinkage,
2002           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2003       ID = Dtor;
2004     }
2005     // Register the information for the entry associated with the destructor.
2006     Out.clear();
2007     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2008         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2009         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2010   }
2011   return CGM.getLangOpts().OpenMPIsDevice;
2012 }
2013 
2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2015                                                           QualType VarType,
2016                                                           StringRef Name) {
2017   std::string Suffix = getName({"artificial", ""});
2018   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2019   llvm::Value *GAddr =
2020       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2021   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2022       CGM.getTarget().isTLSSupported()) {
2023     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2024     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2025   }
2026   std::string CacheSuffix = getName({"cache", ""});
2027   llvm::Value *Args[] = {
2028       emitUpdateLocation(CGF, SourceLocation()),
2029       getThreadID(CGF, SourceLocation()),
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032                                 /*isSigned=*/false),
2033       getOrCreateInternalVariable(
2034           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035   return Address(
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037           CGF.EmitRuntimeCall(
2038               OMPBuilder.getOrCreateRuntimeFunction(
2039                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040               Args),
2041           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042       CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046                                    const RegionCodeGenTy &ThenGen,
2047                                    const RegionCodeGenTy &ElseGen) {
2048   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050   // If the condition constant folds and can be elided, try to avoid emitting
2051   // the condition and the dead arm of the if/else.
2052   bool CondConstant;
2053   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054     if (CondConstant)
2055       ThenGen(CGF);
2056     else
2057       ElseGen(CGF);
2058     return;
2059   }
2060 
2061   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2062   // emit the conditional branch.
2063   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068   // Emit the 'then' code.
2069   CGF.EmitBlock(ThenBlock);
2070   ThenGen(CGF);
2071   CGF.EmitBranch(ContBlock);
2072   // Emit the 'else' code if present.
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBlock(ElseBlock);
2076   ElseGen(CGF);
2077   // There is no need to emit line number for unconditional branch.
2078   (void)ApplyDebugLocation::CreateEmpty(CGF);
2079   CGF.EmitBranch(ContBlock);
2080   // Emit the continuation block for code after the if.
2081   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                        llvm::Function *OutlinedFn,
2086                                        ArrayRef<llvm::Value *> CapturedVars,
2087                                        const Expr *IfCond) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2411       ArgsType), CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2501         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL =
2504       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505                                                       CGF.VoidPtrTy);
2506     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507     llvm::Value *Args[] = {
2508         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509         getThreadID(CGF, Loc),        // i32 <gtid>
2510         BufSize,                      // size_t <buf_size>
2511         CL.getPointer(),              // void *<copyprivate list>
2512         CpyFn,                        // void (*) (void *, void *) <copy_func>
2513         DidItVal                      // i32 did_it
2514     };
2515     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517                         Args);
2518   }
2519 }
2520 
2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522                                         const RegionCodeGenTy &OrderedOpGen,
2523                                         SourceLocation Loc, bool IsThreads) {
2524   if (!CGF.HaveInsertPoint())
2525     return;
2526   // __kmpc_ordered(ident_t *, gtid);
2527   // OrderedOpGen();
2528   // __kmpc_end_ordered(ident_t *, gtid);
2529   // Prepare arguments and build a call to __kmpc_ordered
2530   if (IsThreads) {
2531     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_ordered),
2534                           Args,
2535                           OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537                           Args);
2538     OrderedOpGen.setAction(Action);
2539     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540     return;
2541   }
2542   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546   unsigned Flags;
2547   if (Kind == OMPD_for)
2548     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549   else if (Kind == OMPD_sections)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551   else if (Kind == OMPD_single)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553   else if (Kind == OMPD_barrier)
2554     Flags = OMP_IDENT_BARRIER_EXPL;
2555   else
2556     Flags = OMP_IDENT_BARRIER_IMPL;
2557   return Flags;
2558 }
2559 
2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561     CodeGenFunction &CGF, const OMPLoopDirective &S,
2562     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563   // Check if the loop directive is actually a doacross loop directive. In this
2564   // case choose static, 1 schedule.
2565   if (llvm::any_of(
2566           S.getClausesOfKind<OMPOrderedClause>(),
2567           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568     ScheduleKind = OMPC_SCHEDULE_static;
2569     // Chunk size is 1 in this case.
2570     llvm::APInt ChunkSize(32, 1);
2571     ChunkExpr = IntegerLiteral::Create(
2572         CGF.getContext(), ChunkSize,
2573         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574         SourceLocation());
2575   }
2576 }
2577 
2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2580                                       bool ForceSimpleCall) {
2581   // Check if we should use the OMPBuilder
2582   auto *OMPRegionInfo =
2583       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2586         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587     return;
2588   }
2589 
2590   if (!CGF.HaveInsertPoint())
2591     return;
2592   // Build call __kmpc_cancel_barrier(loc, thread_id);
2593   // Build call __kmpc_barrier(loc, thread_id);
2594   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596   // thread_id);
2597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598                          getThreadID(CGF, Loc)};
2599   if (OMPRegionInfo) {
2600     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601       llvm::Value *Result = CGF.EmitRuntimeCall(
2602           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603                                                 OMPRTL___kmpc_cancel_barrier),
2604           Args);
2605       if (EmitChecks) {
2606         // if (__kmpc_cancel_barrier()) {
2607         //   exit from construct;
2608         // }
2609         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613         CGF.EmitBlock(ExitBB);
2614         //   exit from construct;
2615         CodeGenFunction::JumpDest CancelDestination =
2616             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617         CGF.EmitBranchThroughCleanup(CancelDestination);
2618         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619       }
2620       return;
2621     }
2622   }
2623   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624                           CGM.getModule(), OMPRTL___kmpc_barrier),
2625                       Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630                                           bool Chunked, bool Ordered) {
2631   switch (ScheduleKind) {
2632   case OMPC_SCHEDULE_static:
2633     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2635   case OMPC_SCHEDULE_dynamic:
2636     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637   case OMPC_SCHEDULE_guided:
2638     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639   case OMPC_SCHEDULE_runtime:
2640     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641   case OMPC_SCHEDULE_auto:
2642     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643   case OMPC_SCHEDULE_unknown:
2644     assert(!Chunked && "chunk was specified but schedule kind not known");
2645     return Ordered ? OMP_ord_static : OMP_sch_static;
2646   }
2647   llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653   // only static is allowed for dist_schedule
2654   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658                                          bool Chunked) const {
2659   OpenMPSchedType Schedule =
2660       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661   return Schedule == OMP_sch_static;
2662 }
2663 
2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667   return Schedule == OMP_dist_sch_static;
2668 }
2669 
2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671                                       bool Chunked) const {
2672   OpenMPSchedType Schedule =
2673       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674   return Schedule == OMP_sch_static_chunked;
2675 }
2676 
2677 bool CGOpenMPRuntime::isStaticChunked(
2678     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680   return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687   return Schedule != OMP_sch_static;
2688 }
2689 
2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691                                   OpenMPScheduleClauseModifier M1,
2692                                   OpenMPScheduleClauseModifier M2) {
2693   int Modifier = 0;
2694   switch (M1) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   switch (M2) {
2710   case OMPC_SCHEDULE_MODIFIER_monotonic:
2711     Modifier = OMP_sch_modifier_monotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714     Modifier = OMP_sch_modifier_nonmonotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_simd:
2717     if (Schedule == OMP_sch_static_chunked)
2718       Schedule = OMP_sch_static_balanced_chunked;
2719     break;
2720   case OMPC_SCHEDULE_MODIFIER_last:
2721   case OMPC_SCHEDULE_MODIFIER_unknown:
2722     break;
2723   }
2724   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725   // If the static schedule kind is specified or if the ordered clause is
2726   // specified, and if the nonmonotonic modifier is not specified, the effect is
2727   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728   // modifier is specified, the effect is as if the nonmonotonic modifier is
2729   // specified.
2730   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732           Schedule == OMP_sch_static_balanced_chunked ||
2733           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734           Schedule == OMP_dist_sch_static_chunked ||
2735           Schedule == OMP_dist_sch_static))
2736       Modifier = OMP_sch_modifier_nonmonotonic;
2737   }
2738   return Schedule | Modifier;
2739 }
2740 
2741 void CGOpenMPRuntime::emitForDispatchInit(
2742     CodeGenFunction &CGF, SourceLocation Loc,
2743     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744     bool Ordered, const DispatchRTInput &DispatchValues) {
2745   if (!CGF.HaveInsertPoint())
2746     return;
2747   OpenMPSchedType Schedule = getRuntimeSchedule(
2748       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749   assert(Ordered ||
2750          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752           Schedule != OMP_sch_static_balanced_chunked));
2753   // Call __kmpc_dispatch_init(
2754   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2756   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758   // If the Chunk was not specified in the clause - use default value 1.
2759   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760                                             : CGF.Builder.getIntN(IVSize, 1);
2761   llvm::Value *Args[] = {
2762       emitUpdateLocation(CGF, Loc),
2763       getThreadID(CGF, Loc),
2764       CGF.Builder.getInt32(addMonoNonMonoModifier(
2765           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766       DispatchValues.LB,                                     // Lower
2767       DispatchValues.UB,                                     // Upper
2768       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2769       Chunk                                                  // Chunk
2770   };
2771   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
2774 static void emitForStaticInitCall(
2775     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778     const CGOpenMPRuntime::StaticRTInput &Values) {
2779   if (!CGF.HaveInsertPoint())
2780     return;
2781 
2782   assert(!Values.Ordered);
2783   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784          Schedule == OMP_sch_static_balanced_chunked ||
2785          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786          Schedule == OMP_dist_sch_static ||
2787          Schedule == OMP_dist_sch_static_chunked);
2788 
2789   // Call __kmpc_for_static_init(
2790   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794   llvm::Value *Chunk = Values.Chunk;
2795   if (Chunk == nullptr) {
2796     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797             Schedule == OMP_dist_sch_static) &&
2798            "expected static non-chunked schedule");
2799     // If the Chunk was not specified in the clause - use default value 1.
2800     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801   } else {
2802     assert((Schedule == OMP_sch_static_chunked ||
2803             Schedule == OMP_sch_static_balanced_chunked ||
2804             Schedule == OMP_ord_static_chunked ||
2805             Schedule == OMP_dist_sch_static_chunked) &&
2806            "expected static chunked schedule");
2807   }
2808   llvm::Value *Args[] = {
2809       UpdateLocation,
2810       ThreadId,
2811       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812                                                   M2)), // Schedule type
2813       Values.IL.getPointer(),                           // &isLastIter
2814       Values.LB.getPointer(),                           // &LB
2815       Values.UB.getPointer(),                           // &UB
2816       Values.ST.getPointer(),                           // &Stride
2817       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2818       Chunk                                             // Chunk
2819   };
2820   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824                                         SourceLocation Loc,
2825                                         OpenMPDirectiveKind DKind,
2826                                         const OpenMPScheduleTy &ScheduleKind,
2827                                         const StaticRTInput &Values) {
2828   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830   assert(isOpenMPWorksharingDirective(DKind) &&
2831          "Expected loop-based or sections-based directive.");
2832   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833                                              isOpenMPLoopDirective(DKind)
2834                                                  ? OMP_IDENT_WORK_LOOP
2835                                                  : OMP_IDENT_WORK_SECTIONS);
2836   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837   llvm::FunctionCallee StaticInitFunction =
2838       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2839   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845     CodeGenFunction &CGF, SourceLocation Loc,
2846     OpenMPDistScheduleClauseKind SchedKind,
2847     const CGOpenMPRuntime::StaticRTInput &Values) {
2848   OpenMPSchedType ScheduleNum =
2849       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850   llvm::Value *UpdatedLocation =
2851       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853   llvm::FunctionCallee StaticInitFunction;
2854   bool isGPUDistribute =
2855       CGM.getLangOpts().OpenMPIsDevice &&
2856       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857   StaticInitFunction = createForStaticInitFunction(
2858       Values.IVSize, Values.IVSigned, isGPUDistribute);
2859 
2860   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2861                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2862                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2863 }
2864 
2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2866                                           SourceLocation Loc,
2867                                           OpenMPDirectiveKind DKind) {
2868   if (!CGF.HaveInsertPoint())
2869     return;
2870   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2871   llvm::Value *Args[] = {
2872       emitUpdateLocation(CGF, Loc,
2873                          isOpenMPDistributeDirective(DKind)
2874                              ? OMP_IDENT_WORK_DISTRIBUTE
2875                              : isOpenMPLoopDirective(DKind)
2876                                    ? OMP_IDENT_WORK_LOOP
2877                                    : OMP_IDENT_WORK_SECTIONS),
2878       getThreadID(CGF, Loc)};
2879   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2880   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882     CGF.EmitRuntimeCall(
2883         OMPBuilder.getOrCreateRuntimeFunction(
2884             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885         Args);
2886   else
2887     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889                         Args);
2890 }
2891 
2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2893                                                  SourceLocation Loc,
2894                                                  unsigned IVSize,
2895                                                  bool IVSigned) {
2896   if (!CGF.HaveInsertPoint())
2897     return;
2898   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2899   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2900   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2901 }
2902 
2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2904                                           SourceLocation Loc, unsigned IVSize,
2905                                           bool IVSigned, Address IL,
2906                                           Address LB, Address UB,
2907                                           Address ST) {
2908   // Call __kmpc_dispatch_next(
2909   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2910   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2911   //          kmp_int[32|64] *p_stride);
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc),
2914       getThreadID(CGF, Loc),
2915       IL.getPointer(), // &isLastIter
2916       LB.getPointer(), // &Lower
2917       UB.getPointer(), // &Upper
2918       ST.getPointer()  // &Stride
2919   };
2920   llvm::Value *Call =
2921       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2922   return CGF.EmitScalarConversion(
2923       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2924       CGF.getContext().BoolTy, Loc);
2925 }
2926 
2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2928                                            llvm::Value *NumThreads,
2929                                            SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2935       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2936   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2937                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2938                       Args);
2939 }
2940 
2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2942                                          ProcBindKind ProcBind,
2943                                          SourceLocation Loc) {
2944   if (!CGF.HaveInsertPoint())
2945     return;
2946   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2947   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2948   llvm::Value *Args[] = {
2949       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2950       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2951   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2952                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2953                       Args);
2954 }
2955 
2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2957                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2958   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2959     OMPBuilder.createFlush(CGF.Builder);
2960   } else {
2961     if (!CGF.HaveInsertPoint())
2962       return;
2963     // Build call void __kmpc_flush(ident_t *loc)
2964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965                             CGM.getModule(), OMPRTL___kmpc_flush),
2966                         emitUpdateLocation(CGF, Loc));
2967   }
2968 }
2969 
2970 namespace {
2971 /// Indexes of fields for type kmp_task_t.
2972 enum KmpTaskTFields {
2973   /// List of shared variables.
2974   KmpTaskTShareds,
2975   /// Task routine.
2976   KmpTaskTRoutine,
2977   /// Partition id for the untied tasks.
2978   KmpTaskTPartId,
2979   /// Function with call of destructors for private variables.
2980   Data1,
2981   /// Task priority.
2982   Data2,
2983   /// (Taskloops only) Lower bound.
2984   KmpTaskTLowerBound,
2985   /// (Taskloops only) Upper bound.
2986   KmpTaskTUpperBound,
2987   /// (Taskloops only) Stride.
2988   KmpTaskTStride,
2989   /// (Taskloops only) Is last iteration flag.
2990   KmpTaskTLastIter,
2991   /// (Taskloops only) Reduction data.
2992   KmpTaskTReductions,
2993 };
2994 } // anonymous namespace
2995 
2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2997   return OffloadEntriesTargetRegion.empty() &&
2998          OffloadEntriesDeviceGlobalVar.empty();
2999 }
3000 
3001 /// Initialize target region entry.
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                     StringRef ParentName, unsigned LineNum,
3005                                     unsigned Order) {
3006   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3007                                              "only required for the device "
3008                                              "code generation.");
3009   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3010       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3011                                    OMPTargetRegionEntryTargetRegion);
3012   ++OffloadingEntriesNum;
3013 }
3014 
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017                                   StringRef ParentName, unsigned LineNum,
3018                                   llvm::Constant *Addr, llvm::Constant *ID,
3019                                   OMPTargetRegionEntryKind Flags) {
3020   // If we are emitting code for a target, the entry is already initialized,
3021   // only has to be registered.
3022   if (CGM.getLangOpts().OpenMPIsDevice) {
3023     // This could happen if the device compilation is invoked standalone.
3024     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3025       return;
3026     auto &Entry =
3027         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3028     Entry.setAddress(Addr);
3029     Entry.setID(ID);
3030     Entry.setFlags(Flags);
3031   } else {
3032     if (Flags ==
3033             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3034         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3035                                  /*IgnoreAddressId*/ true))
3036       return;
3037     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3038            "Target region entry already registered!");
3039     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3040     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3046     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3047     bool IgnoreAddressId) const {
3048   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3049   if (PerDevice == OffloadEntriesTargetRegion.end())
3050     return false;
3051   auto PerFile = PerDevice->second.find(FileID);
3052   if (PerFile == PerDevice->second.end())
3053     return false;
3054   auto PerParentName = PerFile->second.find(ParentName);
3055   if (PerParentName == PerFile->second.end())
3056     return false;
3057   auto PerLine = PerParentName->second.find(LineNum);
3058   if (PerLine == PerParentName->second.end())
3059     return false;
3060   // Fail if this entry is already registered.
3061   if (!IgnoreAddressId &&
3062       (PerLine->second.getAddress() || PerLine->second.getID()))
3063     return false;
3064   return true;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3068     const OffloadTargetRegionEntryInfoActTy &Action) {
3069   // Scan all target region entries and perform the provided action.
3070   for (const auto &D : OffloadEntriesTargetRegion)
3071     for (const auto &F : D.second)
3072       for (const auto &P : F.second)
3073         for (const auto &L : P.second)
3074           Action(D.first, F.first, P.first(), L.first, L.second);
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3078     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3079                                        OMPTargetGlobalVarEntryKind Flags,
3080                                        unsigned Order) {
3081   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3082                                              "only required for the device "
3083                                              "code generation.");
3084   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3085   ++OffloadingEntriesNum;
3086 }
3087 
3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3089     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3090                                      CharUnits VarSize,
3091                                      OMPTargetGlobalVarEntryKind Flags,
3092                                      llvm::GlobalValue::LinkageTypes Linkage) {
3093   if (CGM.getLangOpts().OpenMPIsDevice) {
3094     // This could happen if the device compilation is invoked standalone.
3095     if (!hasDeviceGlobalVarEntryInfo(VarName))
3096       return;
3097     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3098     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     Entry.setVarSize(VarSize);
3106     Entry.setLinkage(Linkage);
3107     Entry.setAddress(Addr);
3108   } else {
3109     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3110       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3111       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3112              "Entry not initialized!");
3113       if (Entry.getVarSize().isZero()) {
3114         Entry.setVarSize(VarSize);
3115         Entry.setLinkage(Linkage);
3116       }
3117       return;
3118     }
3119     OffloadEntriesDeviceGlobalVar.try_emplace(
3120         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3121     ++OffloadingEntriesNum;
3122   }
3123 }
3124 
3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3126     actOnDeviceGlobalVarEntriesInfo(
3127         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3128   // Scan all target region entries and perform the provided action.
3129   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3130     Action(E.getKey(), E.getValue());
3131 }
3132 
3133 void CGOpenMPRuntime::createOffloadEntry(
3134     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3135     llvm::GlobalValue::LinkageTypes Linkage) {
3136   StringRef Name = Addr->getName();
3137   llvm::Module &M = CGM.getModule();
3138   llvm::LLVMContext &C = M.getContext();
3139 
3140   // Create constant string with the name.
3141   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3142 
3143   std::string StringName = getName({"omp_offloading", "entry_name"});
3144   auto *Str = new llvm::GlobalVariable(
3145       M, StrPtrInit->getType(), /*isConstant=*/true,
3146       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3147   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3148 
3149   llvm::Constant *Data[] = {
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3152       llvm::ConstantInt::get(CGM.SizeTy, Size),
3153       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3154       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3155   std::string EntryName = getName({"omp_offloading", "entry", ""});
3156   llvm::GlobalVariable *Entry = createGlobalStruct(
3157       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3158       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3159 
3160   // The entry has to be created in the section the linker expects it to be.
3161   Entry->setSection("omp_offloading_entries");
3162 }
3163 
3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3165   // Emit the offloading entries and metadata so that the device codegen side
3166   // can easily figure out what to emit. The produced metadata looks like
3167   // this:
3168   //
3169   // !omp_offload.info = !{!1, ...}
3170   //
3171   // Right now we only generate metadata for function that contain target
3172   // regions.
3173 
3174   // If we are in simd mode or there are no entries, we don't need to do
3175   // anything.
3176   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3177     return;
3178 
3179   llvm::Module &M = CGM.getModule();
3180   llvm::LLVMContext &C = M.getContext();
3181   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3182                          SourceLocation, StringRef>,
3183               16>
3184       OrderedEntries(OffloadEntriesInfoManager.size());
3185   llvm::SmallVector<StringRef, 16> ParentFunctions(
3186       OffloadEntriesInfoManager.size());
3187 
3188   // Auxiliary methods to create metadata values and strings.
3189   auto &&GetMDInt = [this](unsigned V) {
3190     return llvm::ConstantAsMetadata::get(
3191         llvm::ConstantInt::get(CGM.Int32Ty, V));
3192   };
3193 
3194   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3195 
3196   // Create the offloading info metadata node.
3197   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3198 
3199   // Create function that emits metadata for each target region entry;
3200   auto &&TargetRegionMetadataEmitter =
3201       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3202        &GetMDString](
3203           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3204           unsigned Line,
3205           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3206         // Generate metadata for target regions. Each entry of this metadata
3207         // contains:
3208         // - Entry 0 -> Kind of this type of metadata (0).
3209         // - Entry 1 -> Device ID of the file where the entry was identified.
3210         // - Entry 2 -> File ID of the file where the entry was identified.
3211         // - Entry 3 -> Mangled name of the function where the entry was
3212         // identified.
3213         // - Entry 4 -> Line in the file where the entry was identified.
3214         // - Entry 5 -> Order the entry was created.
3215         // The first element of the metadata node is the kind.
3216         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3217                                  GetMDInt(FileID),      GetMDString(ParentName),
3218                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3219 
3220         SourceLocation Loc;
3221         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3222                   E = CGM.getContext().getSourceManager().fileinfo_end();
3223              I != E; ++I) {
3224           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3225               I->getFirst()->getUniqueID().getFile() == FileID) {
3226             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3227                 I->getFirst(), Line, 1);
3228             break;
3229           }
3230         }
3231         // Save this entry in the right position of the ordered entries array.
3232         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3233         ParentFunctions[E.getOrder()] = ParentName;
3234 
3235         // Add metadata to the named metadata node.
3236         MD->addOperand(llvm::MDNode::get(C, Ops));
3237       };
3238 
3239   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3240       TargetRegionMetadataEmitter);
3241 
3242   // Create function that emits metadata for each device global variable entry;
3243   auto &&DeviceGlobalVarMetadataEmitter =
3244       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3245        MD](StringRef MangledName,
3246            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3247                &E) {
3248         // Generate metadata for global variables. Each entry of this metadata
3249         // contains:
3250         // - Entry 0 -> Kind of this type of metadata (1).
3251         // - Entry 1 -> Mangled name of the variable.
3252         // - Entry 2 -> Declare target kind.
3253         // - Entry 3 -> Order the entry was created.
3254         // The first element of the metadata node is the kind.
3255         llvm::Metadata *Ops[] = {
3256             GetMDInt(E.getKind()), GetMDString(MangledName),
3257             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3258 
3259         // Save this entry in the right position of the ordered entries array.
3260         OrderedEntries[E.getOrder()] =
3261             std::make_tuple(&E, SourceLocation(), MangledName);
3262 
3263         // Add metadata to the named metadata node.
3264         MD->addOperand(llvm::MDNode::get(C, Ops));
3265       };
3266 
3267   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3268       DeviceGlobalVarMetadataEmitter);
3269 
3270   for (const auto &E : OrderedEntries) {
3271     assert(std::get<0>(E) && "All ordered entries must exist!");
3272     if (const auto *CE =
3273             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3274                 std::get<0>(E))) {
3275       if (!CE->getID() || !CE->getAddress()) {
3276         // Do not blame the entry if the parent funtion is not emitted.
3277         StringRef FnName = ParentFunctions[CE->getOrder()];
3278         if (!CGM.GetGlobalValue(FnName))
3279           continue;
3280         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281             DiagnosticsEngine::Error,
3282             "Offloading entry for target region in %0 is incorrect: either the "
3283             "address or the ID is invalid.");
3284         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3285         continue;
3286       }
3287       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3288                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3289     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3290                                              OffloadEntryInfoDeviceGlobalVar>(
3291                    std::get<0>(E))) {
3292       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3293           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3294               CE->getFlags());
3295       switch (Flags) {
3296       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3297         if (CGM.getLangOpts().OpenMPIsDevice &&
3298             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3299           continue;
3300         if (!CE->getAddress()) {
3301           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3302               DiagnosticsEngine::Error, "Offloading entry for declare target "
3303                                         "variable %0 is incorrect: the "
3304                                         "address is invalid.");
3305           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3306           continue;
3307         }
3308         // The vaiable has no definition - no need to add the entry.
3309         if (CE->getVarSize().isZero())
3310           continue;
3311         break;
3312       }
3313       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3314         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3315                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3316                "Declaret target link address is set.");
3317         if (CGM.getLangOpts().OpenMPIsDevice)
3318           continue;
3319         if (!CE->getAddress()) {
3320           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3321               DiagnosticsEngine::Error,
3322               "Offloading entry for declare target variable is incorrect: the "
3323               "address is invalid.");
3324           CGM.getDiags().Report(DiagID);
3325           continue;
3326         }
3327         break;
3328       }
3329       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3330                          CE->getVarSize().getQuantity(), Flags,
3331                          CE->getLinkage());
3332     } else {
3333       llvm_unreachable("Unsupported entry kind.");
3334     }
3335   }
3336 }
3337 
3338 /// Loads all the offload entries information from the host IR
3339 /// metadata.
3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3341   // If we are in target mode, load the metadata from the host IR. This code has
3342   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3343 
3344   if (!CGM.getLangOpts().OpenMPIsDevice)
3345     return;
3346 
3347   if (CGM.getLangOpts().OMPHostIRFile.empty())
3348     return;
3349 
3350   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3351   if (auto EC = Buf.getError()) {
3352     CGM.getDiags().Report(diag::err_cannot_open_file)
3353         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3354     return;
3355   }
3356 
3357   llvm::LLVMContext C;
3358   auto ME = expectedToErrorOrAndEmitErrors(
3359       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3360 
3361   if (auto EC = ME.getError()) {
3362     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3363         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3364     CGM.getDiags().Report(DiagID)
3365         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366     return;
3367   }
3368 
3369   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3370   if (!MD)
3371     return;
3372 
3373   for (llvm::MDNode *MN : MD->operands()) {
3374     auto &&GetMDInt = [MN](unsigned Idx) {
3375       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3376       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3377     };
3378 
3379     auto &&GetMDString = [MN](unsigned Idx) {
3380       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3381       return V->getString();
3382     };
3383 
3384     switch (GetMDInt(0)) {
3385     default:
3386       llvm_unreachable("Unexpected metadata!");
3387       break;
3388     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3389         OffloadingEntryInfoTargetRegion:
3390       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3391           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3392           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3393           /*Order=*/GetMDInt(5));
3394       break;
3395     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3396         OffloadingEntryInfoDeviceGlobalVar:
3397       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3398           /*MangledName=*/GetMDString(1),
3399           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3400               /*Flags=*/GetMDInt(2)),
3401           /*Order=*/GetMDInt(3));
3402       break;
3403     }
3404   }
3405 }
3406 
3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3408   if (!KmpRoutineEntryPtrTy) {
3409     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3410     ASTContext &C = CGM.getContext();
3411     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3412     FunctionProtoType::ExtProtoInfo EPI;
3413     KmpRoutineEntryPtrQTy = C.getPointerType(
3414         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3415     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3416   }
3417 }
3418 
3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3420   // Make sure the type of the entry is already created. This is the type we
3421   // have to create:
3422   // struct __tgt_offload_entry{
3423   //   void      *addr;       // Pointer to the offload entry info.
3424   //                          // (function or global)
3425   //   char      *name;       // Name of the function or global.
3426   //   size_t     size;       // Size of the entry info (0 if it a function).
3427   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3428   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3429   // };
3430   if (TgtOffloadEntryQTy.isNull()) {
3431     ASTContext &C = CGM.getContext();
3432     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3433     RD->startDefinition();
3434     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3435     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3436     addFieldToRecordDecl(C, RD, C.getSizeType());
3437     addFieldToRecordDecl(
3438         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     RD->completeDefinition();
3442     RD->addAttr(PackedAttr::CreateImplicit(C));
3443     TgtOffloadEntryQTy = C.getRecordType(RD);
3444   }
3445   return TgtOffloadEntryQTy;
3446 }
3447 
3448 namespace {
3449 struct PrivateHelpersTy {
3450   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3451                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3452       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3453         PrivateElemInit(PrivateElemInit) {}
3454   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3455   const Expr *OriginalRef = nullptr;
3456   const VarDecl *Original = nullptr;
3457   const VarDecl *PrivateCopy = nullptr;
3458   const VarDecl *PrivateElemInit = nullptr;
3459   bool isLocalPrivate() const {
3460     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3461   }
3462 };
3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3464 } // anonymous namespace
3465 
3466 static bool isAllocatableDecl(const VarDecl *VD) {
3467   const VarDecl *CVD = VD->getCanonicalDecl();
3468   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3469     return false;
3470   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3471   // Use the default allocation.
3472   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3473             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650                                TaskPrivatesMap,
3651                                CGF.Builder
3652                                    .CreatePointerBitCastOrAddrSpaceCast(
3653                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654                                    .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875         SharedsTy);
3876   }
3877   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878   for (const PrivateDataTy &Pair : Privates) {
3879     // Do not initialize private locals.
3880     if (Pair.second.isLocalPrivate()) {
3881       ++FI;
3882       continue;
3883     }
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887                              !CGF.isTrivialInitializer(Init)))) {
3888       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890         const VarDecl *OriginalVD = Pair.second.Original;
3891         // Check if the variable is the target-based BasePointersArray,
3892         // PointersArray, SizesArray, or MappersArray.
3893         LValue SharedRefLValue;
3894         QualType Type = PrivateLValue.getType();
3895         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896         if (IsTargetTask && !SharedField) {
3897           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getNumParams() == 0 &&
3901                  isa<TranslationUnitDecl>(
3902                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getDeclContext()) &&
3904                  "Expected artificial target data variable.");
3905           SharedRefLValue =
3906               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907         } else if (ForDup) {
3908           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909           SharedRefLValue = CGF.MakeAddrLValue(
3910               Address(SharedRefLValue.getPointer(CGF),
3911                       C.getDeclAlign(OriginalVD)),
3912               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913               SharedRefLValue.getTBAAInfo());
3914         } else if (CGF.LambdaCaptureFields.count(
3915                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3916                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3917           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918         } else {
3919           // Processing for implicitly captured variables.
3920           InlinedOpenMPRegionRAII Region(
3921               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922               /*HasCancel=*/false, /*NoInheritance=*/true);
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         }
3925         if (Type->isArrayType()) {
3926           // Initialize firstprivate array.
3927           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928             // Perform simple memcpy.
3929             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930           } else {
3931             // Initialize firstprivate array using element-by-element
3932             // initialization.
3933             CGF.EmitOMPAggregateAssign(
3934                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935                 Type,
3936                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937                                                   Address SrcElement) {
3938                   // Clean up any temporaries needed by the initialization.
3939                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940                   InitScope.addPrivate(
3941                       Elem, [SrcElement]() -> Address { return SrcElement; });
3942                   (void)InitScope.Privatize();
3943                   // Emit initialization for single element.
3944                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3945                       CGF, &CapturesInfo);
3946                   CGF.EmitAnyExprToMem(Init, DestElement,
3947                                        Init->getType().getQualifiers(),
3948                                        /*IsInitializer=*/false);
3949                 });
3950           }
3951         } else {
3952           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954             return SharedRefLValue.getAddress(CGF);
3955           });
3956           (void)InitScope.Privatize();
3957           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959                              /*capturedByInit=*/false);
3960         }
3961       } else {
3962         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963       }
3964     }
3965     ++FI;
3966   }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3970 static bool checkInitIsRequired(CodeGenFunction &CGF,
3971                                 ArrayRef<PrivateDataTy> Privates) {
3972   bool InitRequired = false;
3973   for (const PrivateDataTy &Pair : Privates) {
3974     if (Pair.second.isLocalPrivate())
3975       continue;
3976     const VarDecl *VD = Pair.second.PrivateCopy;
3977     const Expr *Init = VD->getAnyInitializer();
3978     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979                                     !CGF.isTrivialInitializer(Init));
3980     if (InitRequired)
3981       break;
3982   }
3983   return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 ///    task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3999                     const OMPExecutableDirective &D,
4000                     QualType KmpTaskTWithPrivatesPtrQTy,
4001                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005   ASTContext &C = CGM.getContext();
4006   FunctionArgList Args;
4007   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4014                                 ImplicitParamDecl::Other);
4015   Args.push_back(&DstArg);
4016   Args.push_back(&SrcArg);
4017   Args.push_back(&LastprivArg);
4018   const auto &TaskDupFnInfo =
4019       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022   auto *TaskDup = llvm::Function::Create(
4023       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025   TaskDup->setDoesNotRecurse();
4026   CodeGenFunction CGF(CGM);
4027   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028                     Loc);
4029 
4030   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031       CGF.GetAddrOfLocalVar(&DstArg),
4032       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033   // task_dst->liter = lastpriv;
4034   if (WithLastIter) {
4035     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4036     LValue Base = CGF.EmitLValueForField(
4037         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042   }
4043 
4044   // Emit initial values for private copies (if any).
4045   assert(!Privates.empty());
4046   Address KmpTaskSharedsPtr = Address::invalid();
4047   if (!Data.FirstprivateVars.empty()) {
4048     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049         CGF.GetAddrOfLocalVar(&SrcArg),
4050         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4051     LValue Base = CGF.EmitLValueForField(
4052         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053     KmpTaskSharedsPtr = Address(
4054         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4055                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056                                                   KmpTaskTShareds)),
4057                              Loc),
4058         CGM.getNaturalTypeAlignment(SharedsTy));
4059   }
4060   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062   CGF.FinishFunction();
4063   return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070                          ArrayRef<PrivateDataTy> Privates) {
4071   for (const PrivateDataTy &P : Privates) {
4072     if (P.second.isLocalPrivate())
4073       continue;
4074     QualType Ty = P.second.Original->getType().getNonReferenceType();
4075     if (Ty.isDestructedType())
4076       return true;
4077   }
4078   return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4084     : public CodeGenFunction::OMPPrivateScope {
4085   CodeGenFunction &CGF;
4086   const OMPIteratorExpr *E = nullptr;
4087   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4089   OMPIteratorGeneratorScope() = delete;
4090   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095     if (!E)
4096       return;
4097     SmallVector<llvm::Value *, 4> Uppers;
4098     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101       addPrivate(VD, [&CGF, VD]() {
4102         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103       });
4104       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107                                  "counter.addr");
4108       });
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4502                                           AffinitiesArray.getPointer(), Idx),
4503                     AffinitiesArray.getAlignment()),
4504             KmpTaskAffinityInfoTy);
4505         // affs[i].base_addr = &<Affinities[i].second>;
4506         LValue BaseAddrLVal = CGF.EmitLValueForField(
4507             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4508         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4509                               BaseAddrLVal);
4510         // affs[i].len = sizeof(<Affinities[i].second>);
4511         LValue LenLVal = CGF.EmitLValueForField(
4512             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4513         CGF.EmitStoreOfScalar(Size, LenLVal);
4514         Idx = CGF.Builder.CreateNUWAdd(
4515             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4516         CGF.EmitStoreOfScalar(Idx, PosLVal);
4517       }
4518     }
4519     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4520     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4521     // naffins, kmp_task_affinity_info_t *affin_list);
4522     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4523     llvm::Value *GTid = getThreadID(CGF, Loc);
4524     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4526     // FIXME: Emit the function and ignore its result for now unless the
4527     // runtime function is properly implemented.
4528     (void)CGF.EmitRuntimeCall(
4529         OMPBuilder.getOrCreateRuntimeFunction(
4530             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4531         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4532   }
4533   llvm::Value *NewTaskNewTaskTTy =
4534       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535           NewTask, KmpTaskTWithPrivatesPtrTy);
4536   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4537                                                KmpTaskTWithPrivatesQTy);
4538   LValue TDBase =
4539       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4540   // Fill the data in the resulting kmp_task_t record.
4541   // Copy shareds if there are any.
4542   Address KmpTaskSharedsPtr = Address::invalid();
4543   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4544     KmpTaskSharedsPtr =
4545         Address(CGF.EmitLoadOfScalar(
4546                     CGF.EmitLValueForField(
4547                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4548                                            KmpTaskTShareds)),
4549                     Loc),
4550                 CGM.getNaturalTypeAlignment(SharedsTy));
4551     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4552     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4553     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4554   }
4555   // Emit initial values for private copies (if any).
4556   TaskResultTy Result;
4557   if (!Privates.empty()) {
4558     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4559                      SharedsTy, SharedsPtrTy, Data, Privates,
4560                      /*ForDup=*/false);
4561     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4562         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4563       Result.TaskDupFn = emitTaskDupFunction(
4564           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4565           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4566           /*WithLastIter=*/!Data.LastprivateVars.empty());
4567     }
4568   }
4569   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4570   enum { Priority = 0, Destructors = 1 };
4571   // Provide pointer to function with destructors for privates.
4572   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4573   const RecordDecl *KmpCmplrdataUD =
4574       (*FI)->getType()->getAsUnionType()->getDecl();
4575   if (NeedsCleanup) {
4576     llvm::Value *DestructorFn = emitDestructorsFunction(
4577         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4578         KmpTaskTWithPrivatesQTy);
4579     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4580     LValue DestructorsLV = CGF.EmitLValueForField(
4581         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4582     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583                               DestructorFn, KmpRoutineEntryPtrTy),
4584                           DestructorsLV);
4585   }
4586   // Set priority.
4587   if (Data.Priority.getInt()) {
4588     LValue Data2LV = CGF.EmitLValueForField(
4589         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4590     LValue PriorityLV = CGF.EmitLValueForField(
4591         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4592     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4593   }
4594   Result.NewTask = NewTask;
4595   Result.TaskEntry = TaskEntry;
4596   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4597   Result.TDBase = TDBase;
4598   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4599   return Result;
4600 }
4601 
4602 namespace {
4603 /// Dependence kind for RTL.
4604 enum RTLDependenceKindTy {
4605   DepIn = 0x01,
4606   DepInOut = 0x3,
4607   DepMutexInOutSet = 0x4
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615   RTLDependenceKindTy DepKind;
4616   switch (K) {
4617   case OMPC_DEPEND_in:
4618     DepKind = DepIn;
4619     break;
4620   // Out and InOut dependencies must use the same code.
4621   case OMPC_DEPEND_out:
4622   case OMPC_DEPEND_inout:
4623     DepKind = DepInOut;
4624     break;
4625   case OMPC_DEPEND_mutexinoutset:
4626     DepKind = DepMutexInOutSet;
4627     break;
4628   case OMPC_DEPEND_source:
4629   case OMPC_DEPEND_sink:
4630   case OMPC_DEPEND_depobj:
4631   case OMPC_DEPEND_unknown:
4632     llvm_unreachable("Unknown task dependence type");
4633   }
4634   return DepKind;
4635 }
4636 
4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4639                            QualType &FlagsTy) {
4640   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4641   if (KmpDependInfoTy.isNull()) {
4642     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4643     KmpDependInfoRD->startDefinition();
4644     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4647     KmpDependInfoRD->completeDefinition();
4648     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4649   }
4650 }
4651 
4652 std::pair<llvm::Value *, LValue>
4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4654                                    SourceLocation Loc) {
4655   ASTContext &C = CGM.getContext();
4656   QualType FlagsTy;
4657   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4658   RecordDecl *KmpDependInfoRD =
4659       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4660   LValue Base = CGF.EmitLoadOfPointerLValue(
4661       DepobjLVal.getAddress(CGF),
4662       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4663   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4664   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4665           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4666   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4667                             Base.getTBAAInfo());
4668   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4669       Addr.getElementType(), Addr.getPointer(),
4670       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671   LValue NumDepsBase = CGF.MakeAddrLValue(
4672       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4673       Base.getBaseInfo(), Base.getTBAAInfo());
4674   // NumDeps = deps[i].base_addr;
4675   LValue BaseAddrLVal = CGF.EmitLValueForField(
4676       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678   return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4683                            const OMPTaskDataTy::DependData &Data,
4684                            Address DependenciesArray) {
4685   CodeGenModule &CGM = CGF.CGM;
4686   ASTContext &C = CGM.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693   OMPIteratorGeneratorScope IteratorScope(
4694       CGF, cast_or_null<OMPIteratorExpr>(
4695                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                  : nullptr));
4697   for (const Expr *E : Data.DepExprs) {
4698     llvm::Value *Addr;
4699     llvm::Value *Size;
4700     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701     LValue Base;
4702     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705     } else {
4706       LValue &PosLVal = *Pos.get<LValue *>();
4707       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708       Base = CGF.MakeAddrLValue(
4709           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4710                                         DependenciesArray.getPointer(), Idx),
4711                   DependenciesArray.getAlignment()),
4712           KmpDependInfoTy);
4713     }
4714     // deps[i].base_addr = &<Dependencies[i].second>;
4715     LValue BaseAddrLVal = CGF.EmitLValueForField(
4716         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4717     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4718                           BaseAddrLVal);
4719     // deps[i].len = sizeof(<Dependencies[i].second>);
4720     LValue LenLVal = CGF.EmitLValueForField(
4721         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4722     CGF.EmitStoreOfScalar(Size, LenLVal);
4723     // deps[i].flags = <Dependencies[i].first>;
4724     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4725     LValue FlagsLVal = CGF.EmitLValueForField(
4726         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4727     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4728                           FlagsLVal);
4729     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4730       ++(*P);
4731     } else {
4732       LValue &PosLVal = *Pos.get<LValue *>();
4733       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4734       Idx = CGF.Builder.CreateNUWAdd(Idx,
4735                                      llvm::ConstantInt::get(Idx->getType(), 1));
4736       CGF.EmitStoreOfScalar(Idx, PosLVal);
4737     }
4738   }
4739 }
4740 
4741 static SmallVector<llvm::Value *, 4>
4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4743                         const OMPTaskDataTy::DependData &Data) {
4744   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745          "Expected depobj dependecy kind.");
4746   SmallVector<llvm::Value *, 4> Sizes;
4747   SmallVector<LValue, 4> SizeLVals;
4748   ASTContext &C = CGF.getContext();
4749   QualType FlagsTy;
4750   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4751   RecordDecl *KmpDependInfoRD =
4752       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4753   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4754   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4755   {
4756     OMPIteratorGeneratorScope IteratorScope(
4757         CGF, cast_or_null<OMPIteratorExpr>(
4758                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4759                                    : nullptr));
4760     for (const Expr *E : Data.DepExprs) {
4761       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4762       LValue Base = CGF.EmitLoadOfPointerLValue(
4763           DepobjLVal.getAddress(CGF),
4764           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4765       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766           Base.getAddress(CGF), KmpDependInfoPtrT);
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4770           Addr.getElementType(), Addr.getPointer(),
4771           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4772       LValue NumDepsBase = CGF.MakeAddrLValue(
4773           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4774           Base.getBaseInfo(), Base.getTBAAInfo());
4775       // NumDeps = deps[i].base_addr;
4776       LValue BaseAddrLVal = CGF.EmitLValueForField(
4777           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4778       llvm::Value *NumDeps =
4779           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4780       LValue NumLVal = CGF.MakeAddrLValue(
4781           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4782           C.getUIntPtrType());
4783       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4784                               NumLVal.getAddress(CGF));
4785       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4786       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4787       CGF.EmitStoreOfScalar(Add, NumLVal);
4788       SizeLVals.push_back(NumLVal);
4789     }
4790   }
4791   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4792     llvm::Value *Size =
4793         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4794     Sizes.push_back(Size);
4795   }
4796   return Sizes;
4797 }
4798 
4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4800                                LValue PosLVal,
4801                                const OMPTaskDataTy::DependData &Data,
4802                                Address DependenciesArray) {
4803   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4804          "Expected depobj dependecy kind.");
4805   ASTContext &C = CGF.getContext();
4806   QualType FlagsTy;
4807   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4808   RecordDecl *KmpDependInfoRD =
4809       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4810   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4811   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4812   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4813   {
4814     OMPIteratorGeneratorScope IteratorScope(
4815         CGF, cast_or_null<OMPIteratorExpr>(
4816                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4817                                    : nullptr));
4818     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4819       const Expr *E = Data.DepExprs[I];
4820       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4821       LValue Base = CGF.EmitLoadOfPointerLValue(
4822           DepobjLVal.getAddress(CGF),
4823           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4824       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825           Base.getAddress(CGF), KmpDependInfoPtrT);
4826       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4827                                 Base.getTBAAInfo());
4828 
4829       // Get number of elements in a single depobj.
4830       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4831           Addr.getElementType(), Addr.getPointer(),
4832           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4833       LValue NumDepsBase = CGF.MakeAddrLValue(
4834           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4835           Base.getBaseInfo(), Base.getTBAAInfo());
4836       // NumDeps = deps[i].base_addr;
4837       LValue BaseAddrLVal = CGF.EmitLValueForField(
4838           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4839       llvm::Value *NumDeps =
4840           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4841 
4842       // memcopy dependency data.
4843       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4844           ElSize,
4845           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4846       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4847       Address DepAddr =
4848           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4849                                         DependenciesArray.getPointer(), Pos),
4850                   DependenciesArray.getAlignment());
4851       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4852 
4853       // Increase pos.
4854       // pos += size;
4855       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4856       CGF.EmitStoreOfScalar(Add, PosLVal);
4857     }
4858   }
4859 }
4860 
4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4862     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4863     SourceLocation Loc) {
4864   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4865         return D.DepExprs.empty();
4866       }))
4867     return std::make_pair(nullptr, Address::invalid());
4868   // Process list of dependencies.
4869   ASTContext &C = CGM.getContext();
4870   Address DependenciesArray = Address::invalid();
4871   llvm::Value *NumOfElements = nullptr;
4872   unsigned NumDependencies = std::accumulate(
4873       Dependencies.begin(), Dependencies.end(), 0,
4874       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4875         return D.DepKind == OMPC_DEPEND_depobj
4876                    ? V
4877                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4878       });
4879   QualType FlagsTy;
4880   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4881   bool HasDepobjDeps = false;
4882   bool HasRegularWithIterators = false;
4883   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4884   llvm::Value *NumOfRegularWithIterators =
4885       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4886   // Calculate number of depobj dependecies and regular deps with the iterators.
4887   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4888     if (D.DepKind == OMPC_DEPEND_depobj) {
4889       SmallVector<llvm::Value *, 4> Sizes =
4890           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4891       for (llvm::Value *Size : Sizes) {
4892         NumOfDepobjElements =
4893             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4894       }
4895       HasDepobjDeps = true;
4896       continue;
4897     }
4898     // Include number of iterations, if any.
4899 
4900     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4901       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4902         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4903         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4904         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4905             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4906         NumOfRegularWithIterators =
4907             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4908       }
4909       HasRegularWithIterators = true;
4910       continue;
4911     }
4912   }
4913 
4914   QualType KmpDependInfoArrayTy;
4915   if (HasDepobjDeps || HasRegularWithIterators) {
4916     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4917                                            /*isSigned=*/false);
4918     if (HasDepobjDeps) {
4919       NumOfElements =
4920           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4921     }
4922     if (HasRegularWithIterators) {
4923       NumOfElements =
4924           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4925     }
4926     auto *OVE = new (C) OpaqueValueExpr(
4927         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4928         VK_PRValue);
4929     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4930                                                   RValue::get(NumOfElements));
4931     KmpDependInfoArrayTy =
4932         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4933                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4934     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4935     // Properly emit variable-sized array.
4936     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4937                                          ImplicitParamDecl::Other);
4938     CGF.EmitVarDecl(*PD);
4939     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4940     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4941                                               /*isSigned=*/false);
4942   } else {
4943     KmpDependInfoArrayTy = C.getConstantArrayType(
4944         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4945         ArrayType::Normal, /*IndexTypeQuals=*/0);
4946     DependenciesArray =
4947         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4948     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4949     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4950                                            /*isSigned=*/false);
4951   }
4952   unsigned Pos = 0;
4953   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4954     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4955         Dependencies[I].IteratorExpr)
4956       continue;
4957     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4958                    DependenciesArray);
4959   }
4960   // Copy regular dependecies with iterators.
4961   LValue PosLVal = CGF.MakeAddrLValue(
4962       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4963   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4964   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4965     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4966         !Dependencies[I].IteratorExpr)
4967       continue;
4968     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4969                    DependenciesArray);
4970   }
4971   // Copy final depobj arrays without iterators.
4972   if (HasDepobjDeps) {
4973     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4974       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4975         continue;
4976       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4977                          DependenciesArray);
4978     }
4979   }
4980   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4981       DependenciesArray, CGF.VoidPtrTy);
4982   return std::make_pair(NumOfElements, DependenciesArray);
4983 }
4984 
4985 Address CGOpenMPRuntime::emitDepobjDependClause(
4986     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4987     SourceLocation Loc) {
4988   if (Dependencies.DepExprs.empty())
4989     return Address::invalid();
4990   // Process list of dependencies.
4991   ASTContext &C = CGM.getContext();
4992   Address DependenciesArray = Address::invalid();
4993   unsigned NumDependencies = Dependencies.DepExprs.size();
4994   QualType FlagsTy;
4995   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4996   RecordDecl *KmpDependInfoRD =
4997       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4998 
4999   llvm::Value *Size;
5000   // Define type kmp_depend_info[<Dependencies.size()>];
5001   // For depobj reserve one extra element to store the number of elements.
5002   // It is required to handle depobj(x) update(in) construct.
5003   // kmp_depend_info[<Dependencies.size()>] deps;
5004   llvm::Value *NumDepsVal;
5005   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5006   if (const auto *IE =
5007           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5008     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5009     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5010       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5011       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5012       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5013     }
5014     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5015                                     NumDepsVal);
5016     CharUnits SizeInBytes =
5017         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5018     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5019     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5020     NumDepsVal =
5021         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5022   } else {
5023     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5024         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5025         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5026     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5027     Size = CGM.getSize(Sz.alignTo(Align));
5028     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5029   }
5030   // Need to allocate on the dynamic memory.
5031   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5032   // Use default allocator.
5033   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5034   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5035 
5036   llvm::Value *Addr =
5037       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5038                               CGM.getModule(), OMPRTL___kmpc_alloc),
5039                           Args, ".dep.arr.addr");
5040   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5041       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5042   DependenciesArray = Address(Addr, Align);
5043   // Write number of elements in the first element of array for depobj.
5044   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5045   // deps[i].base_addr = NumDependencies;
5046   LValue BaseAddrLVal = CGF.EmitLValueForField(
5047       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5048   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5049   llvm::PointerUnion<unsigned *, LValue *> Pos;
5050   unsigned Idx = 1;
5051   LValue PosLVal;
5052   if (Dependencies.IteratorExpr) {
5053     PosLVal = CGF.MakeAddrLValue(
5054         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5055         C.getSizeType());
5056     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5057                           /*IsInit=*/true);
5058     Pos = &PosLVal;
5059   } else {
5060     Pos = &Idx;
5061   }
5062   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5063   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5064       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5065   return DependenciesArray;
5066 }
5067 
5068 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5069                                         SourceLocation Loc) {
5070   ASTContext &C = CGM.getContext();
5071   QualType FlagsTy;
5072   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5073   LValue Base = CGF.EmitLoadOfPointerLValue(
5074       DepobjLVal.getAddress(CGF),
5075       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5076   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5077   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5078       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5079   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5080       Addr.getElementType(), Addr.getPointer(),
5081       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5082   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5083                                                                CGF.VoidPtrTy);
5084   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5085   // Use default allocator.
5086   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5087   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5088 
5089   // _kmpc_free(gtid, addr, nullptr);
5090   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5091                                 CGM.getModule(), OMPRTL___kmpc_free),
5092                             Args);
5093 }
5094 
5095 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5096                                        OpenMPDependClauseKind NewDepKind,
5097                                        SourceLocation Loc) {
5098   ASTContext &C = CGM.getContext();
5099   QualType FlagsTy;
5100   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5101   RecordDecl *KmpDependInfoRD =
5102       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5103   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5104   llvm::Value *NumDeps;
5105   LValue Base;
5106   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5107 
5108   Address Begin = Base.getAddress(CGF);
5109   // Cast from pointer to array type to pointer to single element.
5110   llvm::Value *End = CGF.Builder.CreateGEP(
5111       Begin.getElementType(), Begin.getPointer(), NumDeps);
5112   // The basic structure here is a while-do loop.
5113   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5114   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5115   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5116   CGF.EmitBlock(BodyBB);
5117   llvm::PHINode *ElementPHI =
5118       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5119   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5120   Begin = Address(ElementPHI, Begin.getAlignment());
5121   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5122                             Base.getTBAAInfo());
5123   // deps[i].flags = NewDepKind;
5124   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5125   LValue FlagsLVal = CGF.EmitLValueForField(
5126       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5127   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5128                         FlagsLVal);
5129 
5130   // Shift the address forward by one element.
5131   Address ElementNext =
5132       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5133   ElementPHI->addIncoming(ElementNext.getPointer(),
5134                           CGF.Builder.GetInsertBlock());
5135   llvm::Value *IsEmpty =
5136       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5137   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5138   // Done.
5139   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5140 }
5141 
5142 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5143                                    const OMPExecutableDirective &D,
5144                                    llvm::Function *TaskFunction,
5145                                    QualType SharedsTy, Address Shareds,
5146                                    const Expr *IfCond,
5147                                    const OMPTaskDataTy &Data) {
5148   if (!CGF.HaveInsertPoint())
5149     return;
5150 
5151   TaskResultTy Result =
5152       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5153   llvm::Value *NewTask = Result.NewTask;
5154   llvm::Function *TaskEntry = Result.TaskEntry;
5155   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5156   LValue TDBase = Result.TDBase;
5157   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5158   // Process list of dependences.
5159   Address DependenciesArray = Address::invalid();
5160   llvm::Value *NumOfElements;
5161   std::tie(NumOfElements, DependenciesArray) =
5162       emitDependClause(CGF, Data.Dependences, Loc);
5163 
5164   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5165   // libcall.
5166   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5167   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5168   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5169   // list is not empty
5170   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5171   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5172   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5173   llvm::Value *DepTaskArgs[7];
5174   if (!Data.Dependences.empty()) {
5175     DepTaskArgs[0] = UpLoc;
5176     DepTaskArgs[1] = ThreadID;
5177     DepTaskArgs[2] = NewTask;
5178     DepTaskArgs[3] = NumOfElements;
5179     DepTaskArgs[4] = DependenciesArray.getPointer();
5180     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5181     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5182   }
5183   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5184                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5185     if (!Data.Tied) {
5186       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5187       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5188       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5189     }
5190     if (!Data.Dependences.empty()) {
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(
5193               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5194           DepTaskArgs);
5195     } else {
5196       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5197                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5198                           TaskArgs);
5199     }
5200     // Check if parent region is untied and build return for untied task;
5201     if (auto *Region =
5202             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5203       Region->emitUntiedSwitch(CGF);
5204   };
5205 
5206   llvm::Value *DepWaitTaskArgs[6];
5207   if (!Data.Dependences.empty()) {
5208     DepWaitTaskArgs[0] = UpLoc;
5209     DepWaitTaskArgs[1] = ThreadID;
5210     DepWaitTaskArgs[2] = NumOfElements;
5211     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5212     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5213     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5214   }
5215   auto &M = CGM.getModule();
5216   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5217                         TaskEntry, &Data, &DepWaitTaskArgs,
5218                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5219     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5220     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5221     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5222     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5223     // is specified.
5224     if (!Data.Dependences.empty())
5225       CGF.EmitRuntimeCall(
5226           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5227           DepWaitTaskArgs);
5228     // Call proxy_task_entry(gtid, new_task);
5229     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5230                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5231       Action.Enter(CGF);
5232       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5233       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5234                                                           OutlinedFnArgs);
5235     };
5236 
5237     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5238     // kmp_task_t *new_task);
5239     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5240     // kmp_task_t *new_task);
5241     RegionCodeGenTy RCG(CodeGen);
5242     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5243                               M, OMPRTL___kmpc_omp_task_begin_if0),
5244                           TaskArgs,
5245                           OMPBuilder.getOrCreateRuntimeFunction(
5246                               M, OMPRTL___kmpc_omp_task_complete_if0),
5247                           TaskArgs);
5248     RCG.setAction(Action);
5249     RCG(CGF);
5250   };
5251 
5252   if (IfCond) {
5253     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5254   } else {
5255     RegionCodeGenTy ThenRCG(ThenCodeGen);
5256     ThenRCG(CGF);
5257   }
5258 }
5259 
5260 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5261                                        const OMPLoopDirective &D,
5262                                        llvm::Function *TaskFunction,
5263                                        QualType SharedsTy, Address Shareds,
5264                                        const Expr *IfCond,
5265                                        const OMPTaskDataTy &Data) {
5266   if (!CGF.HaveInsertPoint())
5267     return;
5268   TaskResultTy Result =
5269       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5270   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5271   // libcall.
5272   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5273   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5274   // sched, kmp_uint64 grainsize, void *task_dup);
5275   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5276   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5277   llvm::Value *IfVal;
5278   if (IfCond) {
5279     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5280                                       /*isSigned=*/true);
5281   } else {
5282     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5283   }
5284 
5285   LValue LBLVal = CGF.EmitLValueForField(
5286       Result.TDBase,
5287       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5288   const auto *LBVar =
5289       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5290   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5291                        LBLVal.getQuals(),
5292                        /*IsInitializer=*/true);
5293   LValue UBLVal = CGF.EmitLValueForField(
5294       Result.TDBase,
5295       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5296   const auto *UBVar =
5297       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5298   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5299                        UBLVal.getQuals(),
5300                        /*IsInitializer=*/true);
5301   LValue StLVal = CGF.EmitLValueForField(
5302       Result.TDBase,
5303       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5304   const auto *StVar =
5305       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5306   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5307                        StLVal.getQuals(),
5308                        /*IsInitializer=*/true);
5309   // Store reductions address.
5310   LValue RedLVal = CGF.EmitLValueForField(
5311       Result.TDBase,
5312       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5313   if (Data.Reductions) {
5314     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5315   } else {
5316     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5317                                CGF.getContext().VoidPtrTy);
5318   }
5319   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5320   llvm::Value *TaskArgs[] = {
5321       UpLoc,
5322       ThreadID,
5323       Result.NewTask,
5324       IfVal,
5325       LBLVal.getPointer(CGF),
5326       UBLVal.getPointer(CGF),
5327       CGF.EmitLoadOfScalar(StLVal, Loc),
5328       llvm::ConstantInt::getSigned(
5329           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5330       llvm::ConstantInt::getSigned(
5331           CGF.IntTy, Data.Schedule.getPointer()
5332                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5333                          : NoSchedule),
5334       Data.Schedule.getPointer()
5335           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5336                                       /*isSigned=*/false)
5337           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5338       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5339                              Result.TaskDupFn, CGF.VoidPtrTy)
5340                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5341   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5342                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5343                       TaskArgs);
5344 }
5345 
5346 /// Emit reduction operation for each element of array (required for
5347 /// array sections) LHS op = RHS.
5348 /// \param Type Type of array.
5349 /// \param LHSVar Variable on the left side of the reduction operation
5350 /// (references element of array in original variable).
5351 /// \param RHSVar Variable on the right side of the reduction operation
5352 /// (references element of array in original variable).
5353 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5354 /// RHSVar.
5355 static void EmitOMPAggregateReduction(
5356     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5357     const VarDecl *RHSVar,
5358     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5359                                   const Expr *, const Expr *)> &RedOpGen,
5360     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5361     const Expr *UpExpr = nullptr) {
5362   // Perform element-by-element initialization.
5363   QualType ElementTy;
5364   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5365   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5366 
5367   // Drill down to the base element type on both arrays.
5368   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5369   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5370 
5371   llvm::Value *RHSBegin = RHSAddr.getPointer();
5372   llvm::Value *LHSBegin = LHSAddr.getPointer();
5373   // Cast from pointer to array type to pointer to single element.
5374   llvm::Value *LHSEnd =
5375       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5376   // The basic structure here is a while-do loop.
5377   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5378   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5379   llvm::Value *IsEmpty =
5380       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5381   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5382 
5383   // Enter the loop body, making that address the current address.
5384   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5385   CGF.EmitBlock(BodyBB);
5386 
5387   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5388 
5389   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5390       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5391   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5392   Address RHSElementCurrent =
5393       Address(RHSElementPHI,
5394               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5395 
5396   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5397       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5398   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5399   Address LHSElementCurrent =
5400       Address(LHSElementPHI,
5401               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5402 
5403   // Emit copy.
5404   CodeGenFunction::OMPPrivateScope Scope(CGF);
5405   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5406   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5407   Scope.Privatize();
5408   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5409   Scope.ForceCleanup();
5410 
5411   // Shift the address forward by one element.
5412   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5413       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5414       "omp.arraycpy.dest.element");
5415   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5416       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5417       "omp.arraycpy.src.element");
5418   // Check whether we've reached the end.
5419   llvm::Value *Done =
5420       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5421   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5422   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5423   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5424 
5425   // Done.
5426   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5427 }
5428 
5429 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5430 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5431 /// UDR combiner function.
5432 static void emitReductionCombiner(CodeGenFunction &CGF,
5433                                   const Expr *ReductionOp) {
5434   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5435     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5436       if (const auto *DRE =
5437               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5438         if (const auto *DRD =
5439                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5440           std::pair<llvm::Function *, llvm::Function *> Reduction =
5441               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5442           RValue Func = RValue::get(Reduction.first);
5443           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5444           CGF.EmitIgnoredExpr(ReductionOp);
5445           return;
5446         }
5447   CGF.EmitIgnoredExpr(ReductionOp);
5448 }
5449 
5450 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5451     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5452     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5453     ArrayRef<const Expr *> ReductionOps) {
5454   ASTContext &C = CGM.getContext();
5455 
5456   // void reduction_func(void *LHSArg, void *RHSArg);
5457   FunctionArgList Args;
5458   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5459                            ImplicitParamDecl::Other);
5460   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5461                            ImplicitParamDecl::Other);
5462   Args.push_back(&LHSArg);
5463   Args.push_back(&RHSArg);
5464   const auto &CGFI =
5465       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5466   std::string Name = getName({"omp", "reduction", "reduction_func"});
5467   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5468                                     llvm::GlobalValue::InternalLinkage, Name,
5469                                     &CGM.getModule());
5470   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5471   Fn->setDoesNotRecurse();
5472   CodeGenFunction CGF(CGM);
5473   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5474 
5475   // Dst = (void*[n])(LHSArg);
5476   // Src = (void*[n])(RHSArg);
5477   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5478       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5479       ArgsType), CGF.getPointerAlign());
5480   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5481       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5482       ArgsType), CGF.getPointerAlign());
5483 
5484   //  ...
5485   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5486   //  ...
5487   CodeGenFunction::OMPPrivateScope Scope(CGF);
5488   auto IPriv = Privates.begin();
5489   unsigned Idx = 0;
5490   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5491     const auto *RHSVar =
5492         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5493     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5494       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5495     });
5496     const auto *LHSVar =
5497         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5498     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5499       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5500     });
5501     QualType PrivTy = (*IPriv)->getType();
5502     if (PrivTy->isVariablyModifiedType()) {
5503       // Get array size and emit VLA type.
5504       ++Idx;
5505       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5506       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5507       const VariableArrayType *VLA =
5508           CGF.getContext().getAsVariableArrayType(PrivTy);
5509       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5510       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5511           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5512       CGF.EmitVariablyModifiedType(PrivTy);
5513     }
5514   }
5515   Scope.Privatize();
5516   IPriv = Privates.begin();
5517   auto ILHS = LHSExprs.begin();
5518   auto IRHS = RHSExprs.begin();
5519   for (const Expr *E : ReductionOps) {
5520     if ((*IPriv)->getType()->isArrayType()) {
5521       // Emit reduction for array section.
5522       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5523       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5524       EmitOMPAggregateReduction(
5525           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5526           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5527             emitReductionCombiner(CGF, E);
5528           });
5529     } else {
5530       // Emit reduction for array subscript or single variable.
5531       emitReductionCombiner(CGF, E);
5532     }
5533     ++IPriv;
5534     ++ILHS;
5535     ++IRHS;
5536   }
5537   Scope.ForceCleanup();
5538   CGF.FinishFunction();
5539   return Fn;
5540 }
5541 
5542 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5543                                                   const Expr *ReductionOp,
5544                                                   const Expr *PrivateRef,
5545                                                   const DeclRefExpr *LHS,
5546                                                   const DeclRefExpr *RHS) {
5547   if (PrivateRef->getType()->isArrayType()) {
5548     // Emit reduction for array section.
5549     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5550     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5551     EmitOMPAggregateReduction(
5552         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5553         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5554           emitReductionCombiner(CGF, ReductionOp);
5555         });
5556   } else {
5557     // Emit reduction for array subscript or single variable.
5558     emitReductionCombiner(CGF, ReductionOp);
5559   }
5560 }
5561 
5562 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5563                                     ArrayRef<const Expr *> Privates,
5564                                     ArrayRef<const Expr *> LHSExprs,
5565                                     ArrayRef<const Expr *> RHSExprs,
5566                                     ArrayRef<const Expr *> ReductionOps,
5567                                     ReductionOptionsTy Options) {
5568   if (!CGF.HaveInsertPoint())
5569     return;
5570 
5571   bool WithNowait = Options.WithNowait;
5572   bool SimpleReduction = Options.SimpleReduction;
5573 
5574   // Next code should be emitted for reduction:
5575   //
5576   // static kmp_critical_name lock = { 0 };
5577   //
5578   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5579   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5580   //  ...
5581   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5582   //  *(Type<n>-1*)rhs[<n>-1]);
5583   // }
5584   //
5585   // ...
5586   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5587   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5588   // RedList, reduce_func, &<lock>)) {
5589   // case 1:
5590   //  ...
5591   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5592   //  ...
5593   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5594   // break;
5595   // case 2:
5596   //  ...
5597   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5598   //  ...
5599   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5600   // break;
5601   // default:;
5602   // }
5603   //
5604   // if SimpleReduction is true, only the next code is generated:
5605   //  ...
5606   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5607   //  ...
5608 
5609   ASTContext &C = CGM.getContext();
5610 
5611   if (SimpleReduction) {
5612     CodeGenFunction::RunCleanupsScope Scope(CGF);
5613     auto IPriv = Privates.begin();
5614     auto ILHS = LHSExprs.begin();
5615     auto IRHS = RHSExprs.begin();
5616     for (const Expr *E : ReductionOps) {
5617       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5618                                   cast<DeclRefExpr>(*IRHS));
5619       ++IPriv;
5620       ++ILHS;
5621       ++IRHS;
5622     }
5623     return;
5624   }
5625 
5626   // 1. Build a list of reduction variables.
5627   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5628   auto Size = RHSExprs.size();
5629   for (const Expr *E : Privates) {
5630     if (E->getType()->isVariablyModifiedType())
5631       // Reserve place for array size.
5632       ++Size;
5633   }
5634   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5635   QualType ReductionArrayTy =
5636       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5637                              /*IndexTypeQuals=*/0);
5638   Address ReductionList =
5639       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5640   auto IPriv = Privates.begin();
5641   unsigned Idx = 0;
5642   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5643     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5644     CGF.Builder.CreateStore(
5645         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5646             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5647         Elem);
5648     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5649       // Store array size.
5650       ++Idx;
5651       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5652       llvm::Value *Size = CGF.Builder.CreateIntCast(
5653           CGF.getVLASize(
5654                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5655               .NumElts,
5656           CGF.SizeTy, /*isSigned=*/false);
5657       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5658                               Elem);
5659     }
5660   }
5661 
5662   // 2. Emit reduce_func().
5663   llvm::Function *ReductionFn = emitReductionFunction(
5664       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5665       LHSExprs, RHSExprs, ReductionOps);
5666 
5667   // 3. Create static kmp_critical_name lock = { 0 };
5668   std::string Name = getName({"reduction"});
5669   llvm::Value *Lock = getCriticalRegionLock(Name);
5670 
5671   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5672   // RedList, reduce_func, &<lock>);
5673   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5674   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5675   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5676   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5677       ReductionList.getPointer(), CGF.VoidPtrTy);
5678   llvm::Value *Args[] = {
5679       IdentTLoc,                             // ident_t *<loc>
5680       ThreadId,                              // i32 <gtid>
5681       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5682       ReductionArrayTySize,                  // size_type sizeof(RedList)
5683       RL,                                    // void *RedList
5684       ReductionFn, // void (*) (void *, void *) <reduce_func>
5685       Lock         // kmp_critical_name *&<lock>
5686   };
5687   llvm::Value *Res = CGF.EmitRuntimeCall(
5688       OMPBuilder.getOrCreateRuntimeFunction(
5689           CGM.getModule(),
5690           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5691       Args);
5692 
5693   // 5. Build switch(res)
5694   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5695   llvm::SwitchInst *SwInst =
5696       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5697 
5698   // 6. Build case 1:
5699   //  ...
5700   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5701   //  ...
5702   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5703   // break;
5704   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5705   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5706   CGF.EmitBlock(Case1BB);
5707 
5708   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5709   llvm::Value *EndArgs[] = {
5710       IdentTLoc, // ident_t *<loc>
5711       ThreadId,  // i32 <gtid>
5712       Lock       // kmp_critical_name *&<lock>
5713   };
5714   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5715                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5716     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5717     auto IPriv = Privates.begin();
5718     auto ILHS = LHSExprs.begin();
5719     auto IRHS = RHSExprs.begin();
5720     for (const Expr *E : ReductionOps) {
5721       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5722                                      cast<DeclRefExpr>(*IRHS));
5723       ++IPriv;
5724       ++ILHS;
5725       ++IRHS;
5726     }
5727   };
5728   RegionCodeGenTy RCG(CodeGen);
5729   CommonActionTy Action(
5730       nullptr, llvm::None,
5731       OMPBuilder.getOrCreateRuntimeFunction(
5732           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5733                                       : OMPRTL___kmpc_end_reduce),
5734       EndArgs);
5735   RCG.setAction(Action);
5736   RCG(CGF);
5737 
5738   CGF.EmitBranch(DefaultBB);
5739 
5740   // 7. Build case 2:
5741   //  ...
5742   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5743   //  ...
5744   // break;
5745   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5746   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5747   CGF.EmitBlock(Case2BB);
5748 
5749   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5750                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5751     auto ILHS = LHSExprs.begin();
5752     auto IRHS = RHSExprs.begin();
5753     auto IPriv = Privates.begin();
5754     for (const Expr *E : ReductionOps) {
5755       const Expr *XExpr = nullptr;
5756       const Expr *EExpr = nullptr;
5757       const Expr *UpExpr = nullptr;
5758       BinaryOperatorKind BO = BO_Comma;
5759       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5760         if (BO->getOpcode() == BO_Assign) {
5761           XExpr = BO->getLHS();
5762           UpExpr = BO->getRHS();
5763         }
5764       }
5765       // Try to emit update expression as a simple atomic.
5766       const Expr *RHSExpr = UpExpr;
5767       if (RHSExpr) {
5768         // Analyze RHS part of the whole expression.
5769         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5770                 RHSExpr->IgnoreParenImpCasts())) {
5771           // If this is a conditional operator, analyze its condition for
5772           // min/max reduction operator.
5773           RHSExpr = ACO->getCond();
5774         }
5775         if (const auto *BORHS =
5776                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5777           EExpr = BORHS->getRHS();
5778           BO = BORHS->getOpcode();
5779         }
5780       }
5781       if (XExpr) {
5782         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5783         auto &&AtomicRedGen = [BO, VD,
5784                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5785                                     const Expr *EExpr, const Expr *UpExpr) {
5786           LValue X = CGF.EmitLValue(XExpr);
5787           RValue E;
5788           if (EExpr)
5789             E = CGF.EmitAnyExpr(EExpr);
5790           CGF.EmitOMPAtomicSimpleUpdateExpr(
5791               X, E, BO, /*IsXLHSInRHSPart=*/true,
5792               llvm::AtomicOrdering::Monotonic, Loc,
5793               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5794                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795                 PrivateScope.addPrivate(
5796                     VD, [&CGF, VD, XRValue, Loc]() {
5797                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5798                       CGF.emitOMPSimpleStore(
5799                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5800                           VD->getType().getNonReferenceType(), Loc);
5801                       return LHSTemp;
5802                     });
5803                 (void)PrivateScope.Privatize();
5804                 return CGF.EmitAnyExpr(UpExpr);
5805               });
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           // Emit atomic reduction for array section.
5809           const auto *RHSVar =
5810               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5811           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5812                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5813         } else {
5814           // Emit atomic reduction for array subscript or single variable.
5815           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5816         }
5817       } else {
5818         // Emit as a critical region.
5819         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5820                                            const Expr *, const Expr *) {
5821           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5822           std::string Name = RT.getName({"atomic_reduction"});
5823           RT.emitCriticalRegion(
5824               CGF, Name,
5825               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5826                 Action.Enter(CGF);
5827                 emitReductionCombiner(CGF, E);
5828               },
5829               Loc);
5830         };
5831         if ((*IPriv)->getType()->isArrayType()) {
5832           const auto *LHSVar =
5833               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5834           const auto *RHSVar =
5835               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5836           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5837                                     CritRedGen);
5838         } else {
5839           CritRedGen(CGF, nullptr, nullptr, nullptr);
5840         }
5841       }
5842       ++ILHS;
5843       ++IRHS;
5844       ++IPriv;
5845     }
5846   };
5847   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5848   if (!WithNowait) {
5849     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5850     llvm::Value *EndArgs[] = {
5851         IdentTLoc, // ident_t *<loc>
5852         ThreadId,  // i32 <gtid>
5853         Lock       // kmp_critical_name *&<lock>
5854     };
5855     CommonActionTy Action(nullptr, llvm::None,
5856                           OMPBuilder.getOrCreateRuntimeFunction(
5857                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5858                           EndArgs);
5859     AtomicRCG.setAction(Action);
5860     AtomicRCG(CGF);
5861   } else {
5862     AtomicRCG(CGF);
5863   }
5864 
5865   CGF.EmitBranch(DefaultBB);
5866   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5867 }
5868 
5869 /// Generates unique name for artificial threadprivate variables.
5870 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5871 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5872                                       const Expr *Ref) {
5873   SmallString<256> Buffer;
5874   llvm::raw_svector_ostream Out(Buffer);
5875   const clang::DeclRefExpr *DE;
5876   const VarDecl *D = ::getBaseDecl(Ref, DE);
5877   if (!D)
5878     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5879   D = D->getCanonicalDecl();
5880   std::string Name = CGM.getOpenMPRuntime().getName(
5881       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5882   Out << Prefix << Name << "_"
5883       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5884   return std::string(Out.str());
5885 }
5886 
5887 /// Emits reduction initializer function:
5888 /// \code
5889 /// void @.red_init(void* %arg, void* %orig) {
5890 /// %0 = bitcast void* %arg to <type>*
5891 /// store <type> <init>, <type>* %0
5892 /// ret void
5893 /// }
5894 /// \endcode
5895 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5896                                            SourceLocation Loc,
5897                                            ReductionCodeGen &RCG, unsigned N) {
5898   ASTContext &C = CGM.getContext();
5899   QualType VoidPtrTy = C.VoidPtrTy;
5900   VoidPtrTy.addRestrict();
5901   FunctionArgList Args;
5902   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5903                           ImplicitParamDecl::Other);
5904   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5905                               ImplicitParamDecl::Other);
5906   Args.emplace_back(&Param);
5907   Args.emplace_back(&ParamOrig);
5908   const auto &FnInfo =
5909       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5910   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5911   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5912   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5913                                     Name, &CGM.getModule());
5914   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5915   Fn->setDoesNotRecurse();
5916   CodeGenFunction CGF(CGM);
5917   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5918   Address PrivateAddr = CGF.EmitLoadOfPointer(
5919       CGF.GetAddrOfLocalVar(&Param),
5920       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5921   llvm::Value *Size = nullptr;
5922   // If the size of the reduction item is non-constant, load it from global
5923   // threadprivate variable.
5924   if (RCG.getSizes(N).second) {
5925     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5926         CGF, CGM.getContext().getSizeType(),
5927         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5928     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5929                                 CGM.getContext().getSizeType(), Loc);
5930   }
5931   RCG.emitAggregateType(CGF, N, Size);
5932   LValue OrigLVal;
5933   // If initializer uses initializer from declare reduction construct, emit a
5934   // pointer to the address of the original reduction item (reuired by reduction
5935   // initializer)
5936   if (RCG.usesReductionInitializer(N)) {
5937     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5938     SharedAddr = CGF.EmitLoadOfPointer(
5939         SharedAddr,
5940         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5941     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5942   } else {
5943     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5944         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5945         CGM.getContext().VoidPtrTy);
5946   }
5947   // Emit the initializer:
5948   // %0 = bitcast void* %arg to <type>*
5949   // store <type> <init>, <type>* %0
5950   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5951                          [](CodeGenFunction &) { return false; });
5952   CGF.FinishFunction();
5953   return Fn;
5954 }
5955 
5956 /// Emits reduction combiner function:
5957 /// \code
5958 /// void @.red_comb(void* %arg0, void* %arg1) {
5959 /// %lhs = bitcast void* %arg0 to <type>*
5960 /// %rhs = bitcast void* %arg1 to <type>*
5961 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5962 /// store <type> %2, <type>* %lhs
5963 /// ret void
5964 /// }
5965 /// \endcode
5966 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5967                                            SourceLocation Loc,
5968                                            ReductionCodeGen &RCG, unsigned N,
5969                                            const Expr *ReductionOp,
5970                                            const Expr *LHS, const Expr *RHS,
5971                                            const Expr *PrivateRef) {
5972   ASTContext &C = CGM.getContext();
5973   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5974   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5975   FunctionArgList Args;
5976   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5977                                C.VoidPtrTy, ImplicitParamDecl::Other);
5978   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5979                             ImplicitParamDecl::Other);
5980   Args.emplace_back(&ParamInOut);
5981   Args.emplace_back(&ParamIn);
5982   const auto &FnInfo =
5983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5985   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5986   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5987                                     Name, &CGM.getModule());
5988   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5989   Fn->setDoesNotRecurse();
5990   CodeGenFunction CGF(CGM);
5991   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Remap lhs and rhs variables to the addresses of the function arguments.
6004   // %lhs = bitcast void* %arg0 to <type>*
6005   // %rhs = bitcast void* %arg1 to <type>*
6006   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6007   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6008     // Pull out the pointer to the variable.
6009     Address PtrAddr = CGF.EmitLoadOfPointer(
6010         CGF.GetAddrOfLocalVar(&ParamInOut),
6011         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6012     return CGF.Builder.CreateElementBitCast(
6013         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6014   });
6015   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6016     // Pull out the pointer to the variable.
6017     Address PtrAddr = CGF.EmitLoadOfPointer(
6018         CGF.GetAddrOfLocalVar(&ParamIn),
6019         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6020     return CGF.Builder.CreateElementBitCast(
6021         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6022   });
6023   PrivateScope.Privatize();
6024   // Emit the combiner body:
6025   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6026   // store <type> %2, <type>* %lhs
6027   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6028       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6029       cast<DeclRefExpr>(RHS));
6030   CGF.FinishFunction();
6031   return Fn;
6032 }
6033 
6034 /// Emits reduction finalizer function:
6035 /// \code
6036 /// void @.red_fini(void* %arg) {
6037 /// %0 = bitcast void* %arg to <type>*
6038 /// <destroy>(<type>* %0)
6039 /// ret void
6040 /// }
6041 /// \endcode
6042 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6043                                            SourceLocation Loc,
6044                                            ReductionCodeGen &RCG, unsigned N) {
6045   if (!RCG.needCleanups(N))
6046     return nullptr;
6047   ASTContext &C = CGM.getContext();
6048   FunctionArgList Args;
6049   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6050                           ImplicitParamDecl::Other);
6051   Args.emplace_back(&Param);
6052   const auto &FnInfo =
6053       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6054   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6055   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6056   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6057                                     Name, &CGM.getModule());
6058   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6059   Fn->setDoesNotRecurse();
6060   CodeGenFunction CGF(CGM);
6061   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6062   Address PrivateAddr = CGF.EmitLoadOfPointer(
6063       CGF.GetAddrOfLocalVar(&Param),
6064       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6065   llvm::Value *Size = nullptr;
6066   // If the size of the reduction item is non-constant, load it from global
6067   // threadprivate variable.
6068   if (RCG.getSizes(N).second) {
6069     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6070         CGF, CGM.getContext().getSizeType(),
6071         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6072     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6073                                 CGM.getContext().getSizeType(), Loc);
6074   }
6075   RCG.emitAggregateType(CGF, N, Size);
6076   // Emit the finalizer body:
6077   // <destroy>(<type>* %0)
6078   RCG.emitCleanups(CGF, N, PrivateAddr);
6079   CGF.FinishFunction(Loc);
6080   return Fn;
6081 }
6082 
6083 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6084     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6085     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6086   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6087     return nullptr;
6088 
6089   // Build typedef struct:
6090   // kmp_taskred_input {
6091   //   void *reduce_shar; // shared reduction item
6092   //   void *reduce_orig; // original reduction item used for initialization
6093   //   size_t reduce_size; // size of data item
6094   //   void *reduce_init; // data initialization routine
6095   //   void *reduce_fini; // data finalization routine
6096   //   void *reduce_comb; // data combiner routine
6097   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6098   // } kmp_taskred_input_t;
6099   ASTContext &C = CGM.getContext();
6100   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6101   RD->startDefinition();
6102   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6103   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6104   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6105   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6106   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6107   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6108   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6109       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6110   RD->completeDefinition();
6111   QualType RDType = C.getRecordType(RD);
6112   unsigned Size = Data.ReductionVars.size();
6113   llvm::APInt ArraySize(/*numBits=*/64, Size);
6114   QualType ArrayRDType = C.getConstantArrayType(
6115       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6116   // kmp_task_red_input_t .rd_input.[Size];
6117   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6118   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6119                        Data.ReductionCopies, Data.ReductionOps);
6120   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6121     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6122     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6123                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6124     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6125         TaskRedInput.getPointer(), Idxs,
6126         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6127         ".rd_input.gep.");
6128     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6129     // ElemLVal.reduce_shar = &Shareds[Cnt];
6130     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6131     RCG.emitSharedOrigLValue(CGF, Cnt);
6132     llvm::Value *CastedShared =
6133         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6134     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6135     // ElemLVal.reduce_orig = &Origs[Cnt];
6136     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6137     llvm::Value *CastedOrig =
6138         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6139     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6140     RCG.emitAggregateType(CGF, Cnt);
6141     llvm::Value *SizeValInChars;
6142     llvm::Value *SizeVal;
6143     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6144     // We use delayed creation/initialization for VLAs and array sections. It is
6145     // required because runtime does not provide the way to pass the sizes of
6146     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6147     // threadprivate global variables are used to store these values and use
6148     // them in the functions.
6149     bool DelayedCreation = !!SizeVal;
6150     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6151                                                /*isSigned=*/false);
6152     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6153     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6154     // ElemLVal.reduce_init = init;
6155     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6156     llvm::Value *InitAddr =
6157         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6158     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6159     // ElemLVal.reduce_fini = fini;
6160     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6161     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6162     llvm::Value *FiniAddr = Fini
6163                                 ? CGF.EmitCastToVoidPtr(Fini)
6164                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6165     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6166     // ElemLVal.reduce_comb = comb;
6167     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6168     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6169         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6170         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6171     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6172     // ElemLVal.flags = 0;
6173     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6174     if (DelayedCreation) {
6175       CGF.EmitStoreOfScalar(
6176           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6177           FlagsLVal);
6178     } else
6179       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6180                                  FlagsLVal.getType());
6181   }
6182   if (Data.IsReductionWithTaskMod) {
6183     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6184     // is_ws, int num, void *data);
6185     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6186     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6187                                                   CGM.IntTy, /*isSigned=*/true);
6188     llvm::Value *Args[] = {
6189         IdentTLoc, GTid,
6190         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6191                                /*isSigned=*/true),
6192         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6193         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6194             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6195     return CGF.EmitRuntimeCall(
6196         OMPBuilder.getOrCreateRuntimeFunction(
6197             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6198         Args);
6199   }
6200   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6201   llvm::Value *Args[] = {
6202       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6203                                 /*isSigned=*/true),
6204       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6205       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6206                                                       CGM.VoidPtrTy)};
6207   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6208                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6209                              Args);
6210 }
6211 
6212 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6213                                             SourceLocation Loc,
6214                                             bool IsWorksharingReduction) {
6215   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6216   // is_ws, int num, void *data);
6217   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6218   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6219                                                 CGM.IntTy, /*isSigned=*/true);
6220   llvm::Value *Args[] = {IdentTLoc, GTid,
6221                          llvm::ConstantInt::get(CGM.IntTy,
6222                                                 IsWorksharingReduction ? 1 : 0,
6223                                                 /*isSigned=*/true)};
6224   (void)CGF.EmitRuntimeCall(
6225       OMPBuilder.getOrCreateRuntimeFunction(
6226           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6227       Args);
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6231                                               SourceLocation Loc,
6232                                               ReductionCodeGen &RCG,
6233                                               unsigned N) {
6234   auto Sizes = RCG.getSizes(N);
6235   // Emit threadprivate global variable if the type is non-constant
6236   // (Sizes.second = nullptr).
6237   if (Sizes.second) {
6238     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6239                                                      /*isSigned=*/false);
6240     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6241         CGF, CGM.getContext().getSizeType(),
6242         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6243     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6244   }
6245 }
6246 
6247 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6248                                               SourceLocation Loc,
6249                                               llvm::Value *ReductionsPtr,
6250                                               LValue SharedLVal) {
6251   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6252   // *d);
6253   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6254                                                    CGM.IntTy,
6255                                                    /*isSigned=*/true),
6256                          ReductionsPtr,
6257                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6258                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6259   return Address(
6260       CGF.EmitRuntimeCall(
6261           OMPBuilder.getOrCreateRuntimeFunction(
6262               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6263           Args),
6264       SharedLVal.getAlignment());
6265 }
6266 
6267 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6268                                        const OMPTaskDataTy &Data) {
6269   if (!CGF.HaveInsertPoint())
6270     return;
6271 
6272   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6273     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6274     OMPBuilder.createTaskwait(CGF.Builder);
6275   } else {
6276     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6277     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6278     auto &M = CGM.getModule();
6279     Address DependenciesArray = Address::invalid();
6280     llvm::Value *NumOfElements;
6281     std::tie(NumOfElements, DependenciesArray) =
6282         emitDependClause(CGF, Data.Dependences, Loc);
6283     llvm::Value *DepWaitTaskArgs[6];
6284     if (!Data.Dependences.empty()) {
6285       DepWaitTaskArgs[0] = UpLoc;
6286       DepWaitTaskArgs[1] = ThreadID;
6287       DepWaitTaskArgs[2] = NumOfElements;
6288       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6289       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6290       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6291 
6292       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6293 
6294       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6295       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6296       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6297       // is specified.
6298       CGF.EmitRuntimeCall(
6299           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6300           DepWaitTaskArgs);
6301 
6302     } else {
6303 
6304       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6305       // global_tid);
6306       llvm::Value *Args[] = {UpLoc, ThreadID};
6307       // Ignore return result until untied tasks are supported.
6308       CGF.EmitRuntimeCall(
6309           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6310           Args);
6311     }
6312   }
6313 
6314   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6315     Region->emitUntiedSwitch(CGF);
6316 }
6317 
6318 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6319                                            OpenMPDirectiveKind InnerKind,
6320                                            const RegionCodeGenTy &CodeGen,
6321                                            bool HasCancel) {
6322   if (!CGF.HaveInsertPoint())
6323     return;
6324   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6325                                  InnerKind != OMPD_critical &&
6326                                      InnerKind != OMPD_master &&
6327                                      InnerKind != OMPD_masked);
6328   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6329 }
6330 
6331 namespace {
6332 enum RTCancelKind {
6333   CancelNoreq = 0,
6334   CancelParallel = 1,
6335   CancelLoop = 2,
6336   CancelSections = 3,
6337   CancelTaskgroup = 4
6338 };
6339 } // anonymous namespace
6340 
6341 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6342   RTCancelKind CancelKind = CancelNoreq;
6343   if (CancelRegion == OMPD_parallel)
6344     CancelKind = CancelParallel;
6345   else if (CancelRegion == OMPD_for)
6346     CancelKind = CancelLoop;
6347   else if (CancelRegion == OMPD_sections)
6348     CancelKind = CancelSections;
6349   else {
6350     assert(CancelRegion == OMPD_taskgroup);
6351     CancelKind = CancelTaskgroup;
6352   }
6353   return CancelKind;
6354 }
6355 
6356 void CGOpenMPRuntime::emitCancellationPointCall(
6357     CodeGenFunction &CGF, SourceLocation Loc,
6358     OpenMPDirectiveKind CancelRegion) {
6359   if (!CGF.HaveInsertPoint())
6360     return;
6361   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6362   // global_tid, kmp_int32 cncl_kind);
6363   if (auto *OMPRegionInfo =
6364           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6365     // For 'cancellation point taskgroup', the task region info may not have a
6366     // cancel. This may instead happen in another adjacent task.
6367     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6368       llvm::Value *Args[] = {
6369           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6370           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6371       // Ignore return result until untied tasks are supported.
6372       llvm::Value *Result = CGF.EmitRuntimeCall(
6373           OMPBuilder.getOrCreateRuntimeFunction(
6374               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6375           Args);
6376       // if (__kmpc_cancellationpoint()) {
6377       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6378       //   exit from construct;
6379       // }
6380       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6381       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6382       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6383       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6384       CGF.EmitBlock(ExitBB);
6385       if (CancelRegion == OMPD_parallel)
6386         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6387       // exit from construct;
6388       CodeGenFunction::JumpDest CancelDest =
6389           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6390       CGF.EmitBranchThroughCleanup(CancelDest);
6391       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6392     }
6393   }
6394 }
6395 
6396 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6397                                      const Expr *IfCond,
6398                                      OpenMPDirectiveKind CancelRegion) {
6399   if (!CGF.HaveInsertPoint())
6400     return;
6401   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6402   // kmp_int32 cncl_kind);
6403   auto &M = CGM.getModule();
6404   if (auto *OMPRegionInfo =
6405           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6406     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6407                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6408       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6409       llvm::Value *Args[] = {
6410           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6411           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6412       // Ignore return result until untied tasks are supported.
6413       llvm::Value *Result = CGF.EmitRuntimeCall(
6414           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6415       // if (__kmpc_cancel()) {
6416       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6417       //   exit from construct;
6418       // }
6419       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6420       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6421       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6422       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6423       CGF.EmitBlock(ExitBB);
6424       if (CancelRegion == OMPD_parallel)
6425         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6426       // exit from construct;
6427       CodeGenFunction::JumpDest CancelDest =
6428           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6429       CGF.EmitBranchThroughCleanup(CancelDest);
6430       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6431     };
6432     if (IfCond) {
6433       emitIfClause(CGF, IfCond, ThenGen,
6434                    [](CodeGenFunction &, PrePostActionTy &) {});
6435     } else {
6436       RegionCodeGenTy ThenRCG(ThenGen);
6437       ThenRCG(CGF);
6438     }
6439   }
6440 }
6441 
6442 namespace {
6443 /// Cleanup action for uses_allocators support.
6444 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6445   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6446 
6447 public:
6448   OMPUsesAllocatorsActionTy(
6449       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6450       : Allocators(Allocators) {}
6451   void Enter(CodeGenFunction &CGF) override {
6452     if (!CGF.HaveInsertPoint())
6453       return;
6454     for (const auto &AllocatorData : Allocators) {
6455       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6456           CGF, AllocatorData.first, AllocatorData.second);
6457     }
6458   }
6459   void Exit(CodeGenFunction &CGF) override {
6460     if (!CGF.HaveInsertPoint())
6461       return;
6462     for (const auto &AllocatorData : Allocators) {
6463       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6464                                                         AllocatorData.first);
6465     }
6466   }
6467 };
6468 } // namespace
6469 
6470 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6471     const OMPExecutableDirective &D, StringRef ParentName,
6472     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6473     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6474   assert(!ParentName.empty() && "Invalid target region parent name!");
6475   HasEmittedTargetRegion = true;
6476   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6477   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6478     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6479       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6480       if (!D.AllocatorTraits)
6481         continue;
6482       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6483     }
6484   }
6485   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6486   CodeGen.setAction(UsesAllocatorAction);
6487   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6488                                    IsOffloadEntry, CodeGen);
6489 }
6490 
6491 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6492                                              const Expr *Allocator,
6493                                              const Expr *AllocatorTraits) {
6494   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6495   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6496   // Use default memspace handle.
6497   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6498   llvm::Value *NumTraits = llvm::ConstantInt::get(
6499       CGF.IntTy, cast<ConstantArrayType>(
6500                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6501                      ->getSize()
6502                      .getLimitedValue());
6503   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6504   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6505       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6506   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6507                                            AllocatorTraitsLVal.getBaseInfo(),
6508                                            AllocatorTraitsLVal.getTBAAInfo());
6509   llvm::Value *Traits =
6510       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6511 
6512   llvm::Value *AllocatorVal =
6513       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6514                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6515                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6516   // Store to allocator.
6517   CGF.EmitVarDecl(*cast<VarDecl>(
6518       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6519   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6520   AllocatorVal =
6521       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6522                                Allocator->getType(), Allocator->getExprLoc());
6523   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6524 }
6525 
6526 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6527                                              const Expr *Allocator) {
6528   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6529   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6530   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6531   llvm::Value *AllocatorVal =
6532       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6533   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6534                                           CGF.getContext().VoidPtrTy,
6535                                           Allocator->getExprLoc());
6536   (void)CGF.EmitRuntimeCall(
6537       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6538                                             OMPRTL___kmpc_destroy_allocator),
6539       {ThreadId, AllocatorVal});
6540 }
6541 
6542 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6543     const OMPExecutableDirective &D, StringRef ParentName,
6544     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6545     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6546   // Create a unique name for the entry function using the source location
6547   // information of the current target region. The name will be something like:
6548   //
6549   // __omp_offloading_DD_FFFF_PP_lBB
6550   //
6551   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6552   // mangled name of the function that encloses the target region and BB is the
6553   // line number of the target region.
6554 
6555   unsigned DeviceID;
6556   unsigned FileID;
6557   unsigned Line;
6558   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6559                            Line);
6560   SmallString<64> EntryFnName;
6561   {
6562     llvm::raw_svector_ostream OS(EntryFnName);
6563     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6564        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6565   }
6566 
6567   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6568 
6569   CodeGenFunction CGF(CGM, true);
6570   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6571   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6572 
6573   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6574 
6575   // If this target outline function is not an offload entry, we don't need to
6576   // register it.
6577   if (!IsOffloadEntry)
6578     return;
6579 
6580   // The target region ID is used by the runtime library to identify the current
6581   // target region, so it only has to be unique and not necessarily point to
6582   // anything. It could be the pointer to the outlined function that implements
6583   // the target region, but we aren't using that so that the compiler doesn't
6584   // need to keep that, and could therefore inline the host function if proven
6585   // worthwhile during optimization. In the other hand, if emitting code for the
6586   // device, the ID has to be the function address so that it can retrieved from
6587   // the offloading entry and launched by the runtime library. We also mark the
6588   // outlined function to have external linkage in case we are emitting code for
6589   // the device, because these functions will be entry points to the device.
6590 
6591   if (CGM.getLangOpts().OpenMPIsDevice) {
6592     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6593     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6594     OutlinedFn->setDSOLocal(false);
6595     if (CGM.getTriple().isAMDGCN())
6596       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6597   } else {
6598     std::string Name = getName({EntryFnName, "region_id"});
6599     OutlinedFnID = new llvm::GlobalVariable(
6600         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6601         llvm::GlobalValue::WeakAnyLinkage,
6602         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6603   }
6604 
6605   // Register the information for the entry associated with this target region.
6606   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6607       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6608       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6609 
6610   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6611   int32_t DefaultValTeams = -1;
6612   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6613   if (DefaultValTeams > 0) {
6614     OutlinedFn->addFnAttr("omp_target_num_teams",
6615                           std::to_string(DefaultValTeams));
6616   }
6617   int32_t DefaultValThreads = -1;
6618   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6619   if (DefaultValThreads > 0) {
6620     OutlinedFn->addFnAttr("omp_target_thread_limit",
6621                           std::to_string(DefaultValThreads));
6622   }
6623 }
6624 
6625 /// Checks if the expression is constant or does not have non-trivial function
6626 /// calls.
6627 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6628   // We can skip constant expressions.
6629   // We can skip expressions with trivial calls or simple expressions.
6630   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6631           !E->hasNonTrivialCall(Ctx)) &&
6632          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6633 }
6634 
6635 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6636                                                     const Stmt *Body) {
6637   const Stmt *Child = Body->IgnoreContainers();
6638   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6639     Child = nullptr;
6640     for (const Stmt *S : C->body()) {
6641       if (const auto *E = dyn_cast<Expr>(S)) {
6642         if (isTrivial(Ctx, E))
6643           continue;
6644       }
6645       // Some of the statements can be ignored.
6646       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6647           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6648         continue;
6649       // Analyze declarations.
6650       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6651         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6652               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6653                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6654                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6655                   isa<UsingDirectiveDecl>(D) ||
6656                   isa<OMPDeclareReductionDecl>(D) ||
6657                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6658                 return true;
6659               const auto *VD = dyn_cast<VarDecl>(D);
6660               if (!VD)
6661                 return false;
6662               return VD->hasGlobalStorage() || !VD->isUsed();
6663             }))
6664           continue;
6665       }
6666       // Found multiple children - cannot get the one child only.
6667       if (Child)
6668         return nullptr;
6669       Child = S;
6670     }
6671     if (Child)
6672       Child = Child->IgnoreContainers();
6673   }
6674   return Child;
6675 }
6676 
6677 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6678     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6679     int32_t &DefaultVal) {
6680 
6681   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6682   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6683          "Expected target-based executable directive.");
6684   switch (DirectiveKind) {
6685   case OMPD_target: {
6686     const auto *CS = D.getInnermostCapturedStmt();
6687     const auto *Body =
6688         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6689     const Stmt *ChildStmt =
6690         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6691     if (const auto *NestedDir =
6692             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6693       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6694         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6695           const Expr *NumTeams =
6696               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6697           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6698             if (auto Constant =
6699                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6700               DefaultVal = Constant->getExtValue();
6701           return NumTeams;
6702         }
6703         DefaultVal = 0;
6704         return nullptr;
6705       }
6706       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6707           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6708         DefaultVal = 1;
6709         return nullptr;
6710       }
6711       DefaultVal = 1;
6712       return nullptr;
6713     }
6714     // A value of -1 is used to check if we need to emit no teams region
6715     DefaultVal = -1;
6716     return nullptr;
6717   }
6718   case OMPD_target_teams:
6719   case OMPD_target_teams_distribute:
6720   case OMPD_target_teams_distribute_simd:
6721   case OMPD_target_teams_distribute_parallel_for:
6722   case OMPD_target_teams_distribute_parallel_for_simd: {
6723     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6724       const Expr *NumTeams =
6725           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6726       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6727         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6728           DefaultVal = Constant->getExtValue();
6729       return NumTeams;
6730     }
6731     DefaultVal = 0;
6732     return nullptr;
6733   }
6734   case OMPD_target_parallel:
6735   case OMPD_target_parallel_for:
6736   case OMPD_target_parallel_for_simd:
6737   case OMPD_target_simd:
6738     DefaultVal = 1;
6739     return nullptr;
6740   case OMPD_parallel:
6741   case OMPD_for:
6742   case OMPD_parallel_for:
6743   case OMPD_parallel_master:
6744   case OMPD_parallel_sections:
6745   case OMPD_for_simd:
6746   case OMPD_parallel_for_simd:
6747   case OMPD_cancel:
6748   case OMPD_cancellation_point:
6749   case OMPD_ordered:
6750   case OMPD_threadprivate:
6751   case OMPD_allocate:
6752   case OMPD_task:
6753   case OMPD_simd:
6754   case OMPD_tile:
6755   case OMPD_unroll:
6756   case OMPD_sections:
6757   case OMPD_section:
6758   case OMPD_single:
6759   case OMPD_master:
6760   case OMPD_critical:
6761   case OMPD_taskyield:
6762   case OMPD_barrier:
6763   case OMPD_taskwait:
6764   case OMPD_taskgroup:
6765   case OMPD_atomic:
6766   case OMPD_flush:
6767   case OMPD_depobj:
6768   case OMPD_scan:
6769   case OMPD_teams:
6770   case OMPD_target_data:
6771   case OMPD_target_exit_data:
6772   case OMPD_target_enter_data:
6773   case OMPD_distribute:
6774   case OMPD_distribute_simd:
6775   case OMPD_distribute_parallel_for:
6776   case OMPD_distribute_parallel_for_simd:
6777   case OMPD_teams_distribute:
6778   case OMPD_teams_distribute_simd:
6779   case OMPD_teams_distribute_parallel_for:
6780   case OMPD_teams_distribute_parallel_for_simd:
6781   case OMPD_target_update:
6782   case OMPD_declare_simd:
6783   case OMPD_declare_variant:
6784   case OMPD_begin_declare_variant:
6785   case OMPD_end_declare_variant:
6786   case OMPD_declare_target:
6787   case OMPD_end_declare_target:
6788   case OMPD_declare_reduction:
6789   case OMPD_declare_mapper:
6790   case OMPD_taskloop:
6791   case OMPD_taskloop_simd:
6792   case OMPD_master_taskloop:
6793   case OMPD_master_taskloop_simd:
6794   case OMPD_parallel_master_taskloop:
6795   case OMPD_parallel_master_taskloop_simd:
6796   case OMPD_requires:
6797   case OMPD_metadirective:
6798   case OMPD_unknown:
6799     break;
6800   default:
6801     break;
6802   }
6803   llvm_unreachable("Unexpected directive kind.");
6804 }
6805 
6806 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6807     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6808   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6809          "Clauses associated with the teams directive expected to be emitted "
6810          "only for the host!");
6811   CGBuilderTy &Bld = CGF.Builder;
6812   int32_t DefaultNT = -1;
6813   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6814   if (NumTeams != nullptr) {
6815     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6816 
6817     switch (DirectiveKind) {
6818     case OMPD_target: {
6819       const auto *CS = D.getInnermostCapturedStmt();
6820       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6821       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6822       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6823                                                   /*IgnoreResultAssign*/ true);
6824       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6825                              /*isSigned=*/true);
6826     }
6827     case OMPD_target_teams:
6828     case OMPD_target_teams_distribute:
6829     case OMPD_target_teams_distribute_simd:
6830     case OMPD_target_teams_distribute_parallel_for:
6831     case OMPD_target_teams_distribute_parallel_for_simd: {
6832       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6833       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6834                                                   /*IgnoreResultAssign*/ true);
6835       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6836                              /*isSigned=*/true);
6837     }
6838     default:
6839       break;
6840     }
6841   } else if (DefaultNT == -1) {
6842     return nullptr;
6843   }
6844 
6845   return Bld.getInt32(DefaultNT);
6846 }
6847 
6848 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6849                                   llvm::Value *DefaultThreadLimitVal) {
6850   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6851       CGF.getContext(), CS->getCapturedStmt());
6852   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6853     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6854       llvm::Value *NumThreads = nullptr;
6855       llvm::Value *CondVal = nullptr;
6856       // Handle if clause. If if clause present, the number of threads is
6857       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6858       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6859         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6860         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6861         const OMPIfClause *IfClause = nullptr;
6862         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6863           if (C->getNameModifier() == OMPD_unknown ||
6864               C->getNameModifier() == OMPD_parallel) {
6865             IfClause = C;
6866             break;
6867           }
6868         }
6869         if (IfClause) {
6870           const Expr *Cond = IfClause->getCondition();
6871           bool Result;
6872           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6873             if (!Result)
6874               return CGF.Builder.getInt32(1);
6875           } else {
6876             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6877             if (const auto *PreInit =
6878                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6879               for (const auto *I : PreInit->decls()) {
6880                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6881                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6882                 } else {
6883                   CodeGenFunction::AutoVarEmission Emission =
6884                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6885                   CGF.EmitAutoVarCleanups(Emission);
6886                 }
6887               }
6888             }
6889             CondVal = CGF.EvaluateExprAsBool(Cond);
6890           }
6891         }
6892       }
6893       // Check the value of num_threads clause iff if clause was not specified
6894       // or is not evaluated to false.
6895       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6896         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6897         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6898         const auto *NumThreadsClause =
6899             Dir->getSingleClause<OMPNumThreadsClause>();
6900         CodeGenFunction::LexicalScope Scope(
6901             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6902         if (const auto *PreInit =
6903                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6904           for (const auto *I : PreInit->decls()) {
6905             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6906               CGF.EmitVarDecl(cast<VarDecl>(*I));
6907             } else {
6908               CodeGenFunction::AutoVarEmission Emission =
6909                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6910               CGF.EmitAutoVarCleanups(Emission);
6911             }
6912           }
6913         }
6914         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6915         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6916                                                /*isSigned=*/false);
6917         if (DefaultThreadLimitVal)
6918           NumThreads = CGF.Builder.CreateSelect(
6919               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6920               DefaultThreadLimitVal, NumThreads);
6921       } else {
6922         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6923                                            : CGF.Builder.getInt32(0);
6924       }
6925       // Process condition of the if clause.
6926       if (CondVal) {
6927         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6928                                               CGF.Builder.getInt32(1));
6929       }
6930       return NumThreads;
6931     }
6932     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6933       return CGF.Builder.getInt32(1);
6934     return DefaultThreadLimitVal;
6935   }
6936   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6937                                : CGF.Builder.getInt32(0);
6938 }
6939 
6940 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6941     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6942     int32_t &DefaultVal) {
6943   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6944   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6945          "Expected target-based executable directive.");
6946 
6947   switch (DirectiveKind) {
6948   case OMPD_target:
6949     // Teams have no clause thread_limit
6950     return nullptr;
6951   case OMPD_target_teams:
6952   case OMPD_target_teams_distribute:
6953     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6954       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6955       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6956       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6957         if (auto Constant =
6958                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6959           DefaultVal = Constant->getExtValue();
6960       return ThreadLimit;
6961     }
6962     return nullptr;
6963   case OMPD_target_parallel:
6964   case OMPD_target_parallel_for:
6965   case OMPD_target_parallel_for_simd:
6966   case OMPD_target_teams_distribute_parallel_for:
6967   case OMPD_target_teams_distribute_parallel_for_simd: {
6968     Expr *ThreadLimit = nullptr;
6969     Expr *NumThreads = nullptr;
6970     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6971       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6972       ThreadLimit = ThreadLimitClause->getThreadLimit();
6973       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6974         if (auto Constant =
6975                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6976           DefaultVal = Constant->getExtValue();
6977     }
6978     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6979       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6980       NumThreads = NumThreadsClause->getNumThreads();
6981       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6982         if (auto Constant =
6983                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6984           if (Constant->getExtValue() < DefaultVal) {
6985             DefaultVal = Constant->getExtValue();
6986             ThreadLimit = NumThreads;
6987           }
6988         }
6989       }
6990     }
6991     return ThreadLimit;
6992   }
6993   case OMPD_target_teams_distribute_simd:
6994   case OMPD_target_simd:
6995     DefaultVal = 1;
6996     return nullptr;
6997   case OMPD_parallel:
6998   case OMPD_for:
6999   case OMPD_parallel_for:
7000   case OMPD_parallel_master:
7001   case OMPD_parallel_sections:
7002   case OMPD_for_simd:
7003   case OMPD_parallel_for_simd:
7004   case OMPD_cancel:
7005   case OMPD_cancellation_point:
7006   case OMPD_ordered:
7007   case OMPD_threadprivate:
7008   case OMPD_allocate:
7009   case OMPD_task:
7010   case OMPD_simd:
7011   case OMPD_tile:
7012   case OMPD_unroll:
7013   case OMPD_sections:
7014   case OMPD_section:
7015   case OMPD_single:
7016   case OMPD_master:
7017   case OMPD_critical:
7018   case OMPD_taskyield:
7019   case OMPD_barrier:
7020   case OMPD_taskwait:
7021   case OMPD_taskgroup:
7022   case OMPD_atomic:
7023   case OMPD_flush:
7024   case OMPD_depobj:
7025   case OMPD_scan:
7026   case OMPD_teams:
7027   case OMPD_target_data:
7028   case OMPD_target_exit_data:
7029   case OMPD_target_enter_data:
7030   case OMPD_distribute:
7031   case OMPD_distribute_simd:
7032   case OMPD_distribute_parallel_for:
7033   case OMPD_distribute_parallel_for_simd:
7034   case OMPD_teams_distribute:
7035   case OMPD_teams_distribute_simd:
7036   case OMPD_teams_distribute_parallel_for:
7037   case OMPD_teams_distribute_parallel_for_simd:
7038   case OMPD_target_update:
7039   case OMPD_declare_simd:
7040   case OMPD_declare_variant:
7041   case OMPD_begin_declare_variant:
7042   case OMPD_end_declare_variant:
7043   case OMPD_declare_target:
7044   case OMPD_end_declare_target:
7045   case OMPD_declare_reduction:
7046   case OMPD_declare_mapper:
7047   case OMPD_taskloop:
7048   case OMPD_taskloop_simd:
7049   case OMPD_master_taskloop:
7050   case OMPD_master_taskloop_simd:
7051   case OMPD_parallel_master_taskloop:
7052   case OMPD_parallel_master_taskloop_simd:
7053   case OMPD_requires:
7054   case OMPD_unknown:
7055     break;
7056   default:
7057     break;
7058   }
7059   llvm_unreachable("Unsupported directive kind.");
7060 }
7061 
7062 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7063     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7064   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7065          "Clauses associated with the teams directive expected to be emitted "
7066          "only for the host!");
7067   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7068   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7069          "Expected target-based executable directive.");
7070   CGBuilderTy &Bld = CGF.Builder;
7071   llvm::Value *ThreadLimitVal = nullptr;
7072   llvm::Value *NumThreadsVal = nullptr;
7073   switch (DirectiveKind) {
7074   case OMPD_target: {
7075     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7076     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7077       return NumThreads;
7078     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7079         CGF.getContext(), CS->getCapturedStmt());
7080     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7081       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7082         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7083         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7084         const auto *ThreadLimitClause =
7085             Dir->getSingleClause<OMPThreadLimitClause>();
7086         CodeGenFunction::LexicalScope Scope(
7087             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7088         if (const auto *PreInit =
7089                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7090           for (const auto *I : PreInit->decls()) {
7091             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7092               CGF.EmitVarDecl(cast<VarDecl>(*I));
7093             } else {
7094               CodeGenFunction::AutoVarEmission Emission =
7095                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7096               CGF.EmitAutoVarCleanups(Emission);
7097             }
7098           }
7099         }
7100         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7101             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7102         ThreadLimitVal =
7103             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7104       }
7105       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7106           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7107         CS = Dir->getInnermostCapturedStmt();
7108         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7109             CGF.getContext(), CS->getCapturedStmt());
7110         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7111       }
7112       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7113           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7114         CS = Dir->getInnermostCapturedStmt();
7115         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7116           return NumThreads;
7117       }
7118       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7119         return Bld.getInt32(1);
7120     }
7121     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7122   }
7123   case OMPD_target_teams: {
7124     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7125       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7126       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7127       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7128           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7129       ThreadLimitVal =
7130           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7131     }
7132     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7133     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7134       return NumThreads;
7135     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7136         CGF.getContext(), CS->getCapturedStmt());
7137     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7138       if (Dir->getDirectiveKind() == OMPD_distribute) {
7139         CS = Dir->getInnermostCapturedStmt();
7140         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7141           return NumThreads;
7142       }
7143     }
7144     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7145   }
7146   case OMPD_target_teams_distribute:
7147     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7148       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7149       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7150       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7151           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7152       ThreadLimitVal =
7153           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7154     }
7155     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7156   case OMPD_target_parallel:
7157   case OMPD_target_parallel_for:
7158   case OMPD_target_parallel_for_simd:
7159   case OMPD_target_teams_distribute_parallel_for:
7160   case OMPD_target_teams_distribute_parallel_for_simd: {
7161     llvm::Value *CondVal = nullptr;
7162     // Handle if clause. If if clause present, the number of threads is
7163     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7164     if (D.hasClausesOfKind<OMPIfClause>()) {
7165       const OMPIfClause *IfClause = nullptr;
7166       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7167         if (C->getNameModifier() == OMPD_unknown ||
7168             C->getNameModifier() == OMPD_parallel) {
7169           IfClause = C;
7170           break;
7171         }
7172       }
7173       if (IfClause) {
7174         const Expr *Cond = IfClause->getCondition();
7175         bool Result;
7176         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7177           if (!Result)
7178             return Bld.getInt32(1);
7179         } else {
7180           CodeGenFunction::RunCleanupsScope Scope(CGF);
7181           CondVal = CGF.EvaluateExprAsBool(Cond);
7182         }
7183       }
7184     }
7185     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7186       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7187       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7188       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7189           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7190       ThreadLimitVal =
7191           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7192     }
7193     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7194       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7195       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7196       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7197           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7198       NumThreadsVal =
7199           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7200       ThreadLimitVal = ThreadLimitVal
7201                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7202                                                                 ThreadLimitVal),
7203                                               NumThreadsVal, ThreadLimitVal)
7204                            : NumThreadsVal;
7205     }
7206     if (!ThreadLimitVal)
7207       ThreadLimitVal = Bld.getInt32(0);
7208     if (CondVal)
7209       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7210     return ThreadLimitVal;
7211   }
7212   case OMPD_target_teams_distribute_simd:
7213   case OMPD_target_simd:
7214     return Bld.getInt32(1);
7215   case OMPD_parallel:
7216   case OMPD_for:
7217   case OMPD_parallel_for:
7218   case OMPD_parallel_master:
7219   case OMPD_parallel_sections:
7220   case OMPD_for_simd:
7221   case OMPD_parallel_for_simd:
7222   case OMPD_cancel:
7223   case OMPD_cancellation_point:
7224   case OMPD_ordered:
7225   case OMPD_threadprivate:
7226   case OMPD_allocate:
7227   case OMPD_task:
7228   case OMPD_simd:
7229   case OMPD_tile:
7230   case OMPD_unroll:
7231   case OMPD_sections:
7232   case OMPD_section:
7233   case OMPD_single:
7234   case OMPD_master:
7235   case OMPD_critical:
7236   case OMPD_taskyield:
7237   case OMPD_barrier:
7238   case OMPD_taskwait:
7239   case OMPD_taskgroup:
7240   case OMPD_atomic:
7241   case OMPD_flush:
7242   case OMPD_depobj:
7243   case OMPD_scan:
7244   case OMPD_teams:
7245   case OMPD_target_data:
7246   case OMPD_target_exit_data:
7247   case OMPD_target_enter_data:
7248   case OMPD_distribute:
7249   case OMPD_distribute_simd:
7250   case OMPD_distribute_parallel_for:
7251   case OMPD_distribute_parallel_for_simd:
7252   case OMPD_teams_distribute:
7253   case OMPD_teams_distribute_simd:
7254   case OMPD_teams_distribute_parallel_for:
7255   case OMPD_teams_distribute_parallel_for_simd:
7256   case OMPD_target_update:
7257   case OMPD_declare_simd:
7258   case OMPD_declare_variant:
7259   case OMPD_begin_declare_variant:
7260   case OMPD_end_declare_variant:
7261   case OMPD_declare_target:
7262   case OMPD_end_declare_target:
7263   case OMPD_declare_reduction:
7264   case OMPD_declare_mapper:
7265   case OMPD_taskloop:
7266   case OMPD_taskloop_simd:
7267   case OMPD_master_taskloop:
7268   case OMPD_master_taskloop_simd:
7269   case OMPD_parallel_master_taskloop:
7270   case OMPD_parallel_master_taskloop_simd:
7271   case OMPD_requires:
7272   case OMPD_metadirective:
7273   case OMPD_unknown:
7274     break;
7275   default:
7276     break;
7277   }
7278   llvm_unreachable("Unsupported directive kind.");
7279 }
7280 
7281 namespace {
7282 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7283 
7284 // Utility to handle information from clauses associated with a given
7285 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7286 // It provides a convenient interface to obtain the information and generate
7287 // code for that information.
7288 class MappableExprsHandler {
7289 public:
7290   /// Values for bit flags used to specify the mapping type for
7291   /// offloading.
7292   enum OpenMPOffloadMappingFlags : uint64_t {
7293     /// No flags
7294     OMP_MAP_NONE = 0x0,
7295     /// Allocate memory on the device and move data from host to device.
7296     OMP_MAP_TO = 0x01,
7297     /// Allocate memory on the device and move data from device to host.
7298     OMP_MAP_FROM = 0x02,
7299     /// Always perform the requested mapping action on the element, even
7300     /// if it was already mapped before.
7301     OMP_MAP_ALWAYS = 0x04,
7302     /// Delete the element from the device environment, ignoring the
7303     /// current reference count associated with the element.
7304     OMP_MAP_DELETE = 0x08,
7305     /// The element being mapped is a pointer-pointee pair; both the
7306     /// pointer and the pointee should be mapped.
7307     OMP_MAP_PTR_AND_OBJ = 0x10,
7308     /// This flags signals that the base address of an entry should be
7309     /// passed to the target kernel as an argument.
7310     OMP_MAP_TARGET_PARAM = 0x20,
7311     /// Signal that the runtime library has to return the device pointer
7312     /// in the current position for the data being mapped. Used when we have the
7313     /// use_device_ptr or use_device_addr clause.
7314     OMP_MAP_RETURN_PARAM = 0x40,
7315     /// This flag signals that the reference being passed is a pointer to
7316     /// private data.
7317     OMP_MAP_PRIVATE = 0x80,
7318     /// Pass the element to the device by value.
7319     OMP_MAP_LITERAL = 0x100,
7320     /// Implicit map
7321     OMP_MAP_IMPLICIT = 0x200,
7322     /// Close is a hint to the runtime to allocate memory close to
7323     /// the target device.
7324     OMP_MAP_CLOSE = 0x400,
7325     /// 0x800 is reserved for compatibility with XLC.
7326     /// Produce a runtime error if the data is not already allocated.
7327     OMP_MAP_PRESENT = 0x1000,
7328     // Increment and decrement a separate reference counter so that the data
7329     // cannot be unmapped within the associated region.  Thus, this flag is
7330     // intended to be used on 'target' and 'target data' directives because they
7331     // are inherently structured.  It is not intended to be used on 'target
7332     // enter data' and 'target exit data' directives because they are inherently
7333     // dynamic.
7334     // This is an OpenMP extension for the sake of OpenACC support.
7335     OMP_MAP_OMPX_HOLD = 0x2000,
7336     /// Signal that the runtime library should use args as an array of
7337     /// descriptor_dim pointers and use args_size as dims. Used when we have
7338     /// non-contiguous list items in target update directive
7339     OMP_MAP_NON_CONTIG = 0x100000000000,
7340     /// The 16 MSBs of the flags indicate whether the entry is member of some
7341     /// struct/class.
7342     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7343     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7344   };
7345 
7346   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7347   static unsigned getFlagMemberOffset() {
7348     unsigned Offset = 0;
7349     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7350          Remain = Remain >> 1)
7351       Offset++;
7352     return Offset;
7353   }
7354 
7355   /// Class that holds debugging information for a data mapping to be passed to
7356   /// the runtime library.
7357   class MappingExprInfo {
7358     /// The variable declaration used for the data mapping.
7359     const ValueDecl *MapDecl = nullptr;
7360     /// The original expression used in the map clause, or null if there is
7361     /// none.
7362     const Expr *MapExpr = nullptr;
7363 
7364   public:
7365     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7366         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7367 
7368     const ValueDecl *getMapDecl() const { return MapDecl; }
7369     const Expr *getMapExpr() const { return MapExpr; }
7370   };
7371 
7372   /// Class that associates information with a base pointer to be passed to the
7373   /// runtime library.
7374   class BasePointerInfo {
7375     /// The base pointer.
7376     llvm::Value *Ptr = nullptr;
7377     /// The base declaration that refers to this device pointer, or null if
7378     /// there is none.
7379     const ValueDecl *DevPtrDecl = nullptr;
7380 
7381   public:
7382     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7383         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7384     llvm::Value *operator*() const { return Ptr; }
7385     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7386     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7387   };
7388 
7389   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7390   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7391   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7392   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7393   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7394   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7395   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7396 
7397   /// This structure contains combined information generated for mappable
7398   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7399   /// mappers, and non-contiguous information.
7400   struct MapCombinedInfoTy {
7401     struct StructNonContiguousInfo {
7402       bool IsNonContiguous = false;
7403       MapDimArrayTy Dims;
7404       MapNonContiguousArrayTy Offsets;
7405       MapNonContiguousArrayTy Counts;
7406       MapNonContiguousArrayTy Strides;
7407     };
7408     MapExprsArrayTy Exprs;
7409     MapBaseValuesArrayTy BasePointers;
7410     MapValuesArrayTy Pointers;
7411     MapValuesArrayTy Sizes;
7412     MapFlagsArrayTy Types;
7413     MapMappersArrayTy Mappers;
7414     StructNonContiguousInfo NonContigInfo;
7415 
7416     /// Append arrays in \a CurInfo.
7417     void append(MapCombinedInfoTy &CurInfo) {
7418       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7419       BasePointers.append(CurInfo.BasePointers.begin(),
7420                           CurInfo.BasePointers.end());
7421       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7422       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7423       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7424       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7425       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7426                                  CurInfo.NonContigInfo.Dims.end());
7427       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7428                                     CurInfo.NonContigInfo.Offsets.end());
7429       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7430                                    CurInfo.NonContigInfo.Counts.end());
7431       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7432                                     CurInfo.NonContigInfo.Strides.end());
7433     }
7434   };
7435 
7436   /// Map between a struct and the its lowest & highest elements which have been
7437   /// mapped.
7438   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7439   ///                    HE(FieldIndex, Pointer)}
7440   struct StructRangeInfoTy {
7441     MapCombinedInfoTy PreliminaryMapData;
7442     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7443         0, Address::invalid()};
7444     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7445         0, Address::invalid()};
7446     Address Base = Address::invalid();
7447     Address LB = Address::invalid();
7448     bool IsArraySection = false;
7449     bool HasCompleteRecord = false;
7450   };
7451 
7452 private:
7453   /// Kind that defines how a device pointer has to be returned.
7454   struct MapInfo {
7455     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7456     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7457     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7458     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7459     bool ReturnDevicePointer = false;
7460     bool IsImplicit = false;
7461     const ValueDecl *Mapper = nullptr;
7462     const Expr *VarRef = nullptr;
7463     bool ForDeviceAddr = false;
7464 
7465     MapInfo() = default;
7466     MapInfo(
7467         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7468         OpenMPMapClauseKind MapType,
7469         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7470         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7471         bool ReturnDevicePointer, bool IsImplicit,
7472         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7473         bool ForDeviceAddr = false)
7474         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7475           MotionModifiers(MotionModifiers),
7476           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7477           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7478   };
7479 
7480   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7481   /// member and there is no map information about it, then emission of that
7482   /// entry is deferred until the whole struct has been processed.
7483   struct DeferredDevicePtrEntryTy {
7484     const Expr *IE = nullptr;
7485     const ValueDecl *VD = nullptr;
7486     bool ForDeviceAddr = false;
7487 
7488     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7489                              bool ForDeviceAddr)
7490         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7491   };
7492 
7493   /// The target directive from where the mappable clauses were extracted. It
7494   /// is either a executable directive or a user-defined mapper directive.
7495   llvm::PointerUnion<const OMPExecutableDirective *,
7496                      const OMPDeclareMapperDecl *>
7497       CurDir;
7498 
7499   /// Function the directive is being generated for.
7500   CodeGenFunction &CGF;
7501 
7502   /// Set of all first private variables in the current directive.
7503   /// bool data is set to true if the variable is implicitly marked as
7504   /// firstprivate, false otherwise.
7505   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7506 
7507   /// Map between device pointer declarations and their expression components.
7508   /// The key value for declarations in 'this' is null.
7509   llvm::DenseMap<
7510       const ValueDecl *,
7511       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7512       DevPointersMap;
7513 
7514   /// Map between lambda declarations and their map type.
7515   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7516 
7517   llvm::Value *getExprTypeSize(const Expr *E) const {
7518     QualType ExprTy = E->getType().getCanonicalType();
7519 
7520     // Calculate the size for array shaping expression.
7521     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7522       llvm::Value *Size =
7523           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7524       for (const Expr *SE : OAE->getDimensions()) {
7525         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7526         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7527                                       CGF.getContext().getSizeType(),
7528                                       SE->getExprLoc());
7529         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7530       }
7531       return Size;
7532     }
7533 
7534     // Reference types are ignored for mapping purposes.
7535     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7536       ExprTy = RefTy->getPointeeType().getCanonicalType();
7537 
7538     // Given that an array section is considered a built-in type, we need to
7539     // do the calculation based on the length of the section instead of relying
7540     // on CGF.getTypeSize(E->getType()).
7541     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7542       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7543                             OAE->getBase()->IgnoreParenImpCasts())
7544                             .getCanonicalType();
7545 
7546       // If there is no length associated with the expression and lower bound is
7547       // not specified too, that means we are using the whole length of the
7548       // base.
7549       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7550           !OAE->getLowerBound())
7551         return CGF.getTypeSize(BaseTy);
7552 
7553       llvm::Value *ElemSize;
7554       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7555         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7556       } else {
7557         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7558         assert(ATy && "Expecting array type if not a pointer type.");
7559         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7560       }
7561 
7562       // If we don't have a length at this point, that is because we have an
7563       // array section with a single element.
7564       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7565         return ElemSize;
7566 
7567       if (const Expr *LenExpr = OAE->getLength()) {
7568         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7569         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7570                                              CGF.getContext().getSizeType(),
7571                                              LenExpr->getExprLoc());
7572         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7573       }
7574       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7575              OAE->getLowerBound() && "expected array_section[lb:].");
7576       // Size = sizetype - lb * elemtype;
7577       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7578       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7579       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7580                                        CGF.getContext().getSizeType(),
7581                                        OAE->getLowerBound()->getExprLoc());
7582       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7583       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7584       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7585       LengthVal = CGF.Builder.CreateSelect(
7586           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7587       return LengthVal;
7588     }
7589     return CGF.getTypeSize(ExprTy);
7590   }
7591 
7592   /// Return the corresponding bits for a given map clause modifier. Add
7593   /// a flag marking the map as a pointer if requested. Add a flag marking the
7594   /// map as the first one of a series of maps that relate to the same map
7595   /// expression.
7596   OpenMPOffloadMappingFlags getMapTypeBits(
7597       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7598       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7599       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7600     OpenMPOffloadMappingFlags Bits =
7601         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7602     switch (MapType) {
7603     case OMPC_MAP_alloc:
7604     case OMPC_MAP_release:
7605       // alloc and release is the default behavior in the runtime library,  i.e.
7606       // if we don't pass any bits alloc/release that is what the runtime is
7607       // going to do. Therefore, we don't need to signal anything for these two
7608       // type modifiers.
7609       break;
7610     case OMPC_MAP_to:
7611       Bits |= OMP_MAP_TO;
7612       break;
7613     case OMPC_MAP_from:
7614       Bits |= OMP_MAP_FROM;
7615       break;
7616     case OMPC_MAP_tofrom:
7617       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7618       break;
7619     case OMPC_MAP_delete:
7620       Bits |= OMP_MAP_DELETE;
7621       break;
7622     case OMPC_MAP_unknown:
7623       llvm_unreachable("Unexpected map type!");
7624     }
7625     if (AddPtrFlag)
7626       Bits |= OMP_MAP_PTR_AND_OBJ;
7627     if (AddIsTargetParamFlag)
7628       Bits |= OMP_MAP_TARGET_PARAM;
7629     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7630       Bits |= OMP_MAP_ALWAYS;
7631     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7632       Bits |= OMP_MAP_CLOSE;
7633     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7634         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7635       Bits |= OMP_MAP_PRESENT;
7636     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7637       Bits |= OMP_MAP_OMPX_HOLD;
7638     if (IsNonContiguous)
7639       Bits |= OMP_MAP_NON_CONTIG;
7640     return Bits;
7641   }
7642 
7643   /// Return true if the provided expression is a final array section. A
7644   /// final array section, is one whose length can't be proved to be one.
7645   bool isFinalArraySectionExpression(const Expr *E) const {
7646     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7647 
7648     // It is not an array section and therefore not a unity-size one.
7649     if (!OASE)
7650       return false;
7651 
7652     // An array section with no colon always refer to a single element.
7653     if (OASE->getColonLocFirst().isInvalid())
7654       return false;
7655 
7656     const Expr *Length = OASE->getLength();
7657 
7658     // If we don't have a length we have to check if the array has size 1
7659     // for this dimension. Also, we should always expect a length if the
7660     // base type is pointer.
7661     if (!Length) {
7662       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7663                              OASE->getBase()->IgnoreParenImpCasts())
7664                              .getCanonicalType();
7665       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7666         return ATy->getSize().getSExtValue() != 1;
7667       // If we don't have a constant dimension length, we have to consider
7668       // the current section as having any size, so it is not necessarily
7669       // unitary. If it happen to be unity size, that's user fault.
7670       return true;
7671     }
7672 
7673     // Check if the length evaluates to 1.
7674     Expr::EvalResult Result;
7675     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7676       return true; // Can have more that size 1.
7677 
7678     llvm::APSInt ConstLength = Result.Val.getInt();
7679     return ConstLength.getSExtValue() != 1;
7680   }
7681 
7682   /// Generate the base pointers, section pointers, sizes, map type bits, and
7683   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7684   /// map type, map or motion modifiers, and expression components.
7685   /// \a IsFirstComponent should be set to true if the provided set of
7686   /// components is the first associated with a capture.
7687   void generateInfoForComponentList(
7688       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7689       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7690       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7691       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7692       bool IsFirstComponentList, bool IsImplicit,
7693       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7694       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7695       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7696           OverlappedElements = llvm::None) const {
7697     // The following summarizes what has to be generated for each map and the
7698     // types below. The generated information is expressed in this order:
7699     // base pointer, section pointer, size, flags
7700     // (to add to the ones that come from the map type and modifier).
7701     //
7702     // double d;
7703     // int i[100];
7704     // float *p;
7705     //
7706     // struct S1 {
7707     //   int i;
7708     //   float f[50];
7709     // }
7710     // struct S2 {
7711     //   int i;
7712     //   float f[50];
7713     //   S1 s;
7714     //   double *p;
7715     //   struct S2 *ps;
7716     //   int &ref;
7717     // }
7718     // S2 s;
7719     // S2 *ps;
7720     //
7721     // map(d)
7722     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7723     //
7724     // map(i)
7725     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7726     //
7727     // map(i[1:23])
7728     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7729     //
7730     // map(p)
7731     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7732     //
7733     // map(p[1:24])
7734     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7735     // in unified shared memory mode or for local pointers
7736     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7737     //
7738     // map(s)
7739     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7740     //
7741     // map(s.i)
7742     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7743     //
7744     // map(s.s.f)
7745     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7746     //
7747     // map(s.p)
7748     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7749     //
7750     // map(to: s.p[:22])
7751     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7752     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7753     // &(s.p), &(s.p[0]), 22*sizeof(double),
7754     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7755     // (*) alloc space for struct members, only this is a target parameter
7756     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7757     //      optimizes this entry out, same in the examples below)
7758     // (***) map the pointee (map: to)
7759     //
7760     // map(to: s.ref)
7761     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7762     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7763     // (*) alloc space for struct members, only this is a target parameter
7764     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7765     //      optimizes this entry out, same in the examples below)
7766     // (***) map the pointee (map: to)
7767     //
7768     // map(s.ps)
7769     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7770     //
7771     // map(from: s.ps->s.i)
7772     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7773     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7774     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7775     //
7776     // map(to: s.ps->ps)
7777     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7778     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7779     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7780     //
7781     // map(s.ps->ps->ps)
7782     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7783     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7784     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7785     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7786     //
7787     // map(to: s.ps->ps->s.f[:22])
7788     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7789     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7790     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7791     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7792     //
7793     // map(ps)
7794     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7795     //
7796     // map(ps->i)
7797     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7798     //
7799     // map(ps->s.f)
7800     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7801     //
7802     // map(from: ps->p)
7803     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7804     //
7805     // map(to: ps->p[:22])
7806     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7807     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7808     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7809     //
7810     // map(ps->ps)
7811     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7812     //
7813     // map(from: ps->ps->s.i)
7814     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7815     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7816     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7817     //
7818     // map(from: ps->ps->ps)
7819     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7820     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7821     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7822     //
7823     // map(ps->ps->ps->ps)
7824     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7825     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7826     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7827     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7828     //
7829     // map(to: ps->ps->ps->s.f[:22])
7830     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7831     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7832     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7833     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7834     //
7835     // map(to: s.f[:22]) map(from: s.p[:33])
7836     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7837     //     sizeof(double*) (**), TARGET_PARAM
7838     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7839     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7840     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7841     // (*) allocate contiguous space needed to fit all mapped members even if
7842     //     we allocate space for members not mapped (in this example,
7843     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7844     //     them as well because they fall between &s.f[0] and &s.p)
7845     //
7846     // map(from: s.f[:22]) map(to: ps->p[:33])
7847     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7848     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7849     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7850     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7851     // (*) the struct this entry pertains to is the 2nd element in the list of
7852     //     arguments, hence MEMBER_OF(2)
7853     //
7854     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7855     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7856     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7857     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7858     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7859     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7860     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7861     // (*) the struct this entry pertains to is the 4th element in the list
7862     //     of arguments, hence MEMBER_OF(4)
7863 
7864     // Track if the map information being generated is the first for a capture.
7865     bool IsCaptureFirstInfo = IsFirstComponentList;
7866     // When the variable is on a declare target link or in a to clause with
7867     // unified memory, a reference is needed to hold the host/device address
7868     // of the variable.
7869     bool RequiresReference = false;
7870 
7871     // Scan the components from the base to the complete expression.
7872     auto CI = Components.rbegin();
7873     auto CE = Components.rend();
7874     auto I = CI;
7875 
7876     // Track if the map information being generated is the first for a list of
7877     // components.
7878     bool IsExpressionFirstInfo = true;
7879     bool FirstPointerInComplexData = false;
7880     Address BP = Address::invalid();
7881     const Expr *AssocExpr = I->getAssociatedExpression();
7882     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7883     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7884     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7885 
7886     if (isa<MemberExpr>(AssocExpr)) {
7887       // The base is the 'this' pointer. The content of the pointer is going
7888       // to be the base of the field being mapped.
7889       BP = CGF.LoadCXXThisAddress();
7890     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7891                (OASE &&
7892                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7893       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7894     } else if (OAShE &&
7895                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7896       BP = Address(
7897           CGF.EmitScalarExpr(OAShE->getBase()),
7898           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7899     } else {
7900       // The base is the reference to the variable.
7901       // BP = &Var.
7902       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7903       if (const auto *VD =
7904               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7905         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7906                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7907           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7908               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7909                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7910             RequiresReference = true;
7911             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7912           }
7913         }
7914       }
7915 
7916       // If the variable is a pointer and is being dereferenced (i.e. is not
7917       // the last component), the base has to be the pointer itself, not its
7918       // reference. References are ignored for mapping purposes.
7919       QualType Ty =
7920           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7921       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7922         // No need to generate individual map information for the pointer, it
7923         // can be associated with the combined storage if shared memory mode is
7924         // active or the base declaration is not global variable.
7925         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7926         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7927             !VD || VD->hasLocalStorage())
7928           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7929         else
7930           FirstPointerInComplexData = true;
7931         ++I;
7932       }
7933     }
7934 
7935     // Track whether a component of the list should be marked as MEMBER_OF some
7936     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7937     // in a component list should be marked as MEMBER_OF, all subsequent entries
7938     // do not belong to the base struct. E.g.
7939     // struct S2 s;
7940     // s.ps->ps->ps->f[:]
7941     //   (1) (2) (3) (4)
7942     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7943     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7944     // is the pointee of ps(2) which is not member of struct s, so it should not
7945     // be marked as such (it is still PTR_AND_OBJ).
7946     // The variable is initialized to false so that PTR_AND_OBJ entries which
7947     // are not struct members are not considered (e.g. array of pointers to
7948     // data).
7949     bool ShouldBeMemberOf = false;
7950 
7951     // Variable keeping track of whether or not we have encountered a component
7952     // in the component list which is a member expression. Useful when we have a
7953     // pointer or a final array section, in which case it is the previous
7954     // component in the list which tells us whether we have a member expression.
7955     // E.g. X.f[:]
7956     // While processing the final array section "[:]" it is "f" which tells us
7957     // whether we are dealing with a member of a declared struct.
7958     const MemberExpr *EncounteredME = nullptr;
7959 
7960     // Track for the total number of dimension. Start from one for the dummy
7961     // dimension.
7962     uint64_t DimSize = 1;
7963 
7964     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7965     bool IsPrevMemberReference = false;
7966 
7967     for (; I != CE; ++I) {
7968       // If the current component is member of a struct (parent struct) mark it.
7969       if (!EncounteredME) {
7970         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7971         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7972         // as MEMBER_OF the parent struct.
7973         if (EncounteredME) {
7974           ShouldBeMemberOf = true;
7975           // Do not emit as complex pointer if this is actually not array-like
7976           // expression.
7977           if (FirstPointerInComplexData) {
7978             QualType Ty = std::prev(I)
7979                               ->getAssociatedDeclaration()
7980                               ->getType()
7981                               .getNonReferenceType();
7982             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7983             FirstPointerInComplexData = false;
7984           }
7985         }
7986       }
7987 
7988       auto Next = std::next(I);
7989 
7990       // We need to generate the addresses and sizes if this is the last
7991       // component, if the component is a pointer or if it is an array section
7992       // whose length can't be proved to be one. If this is a pointer, it
7993       // becomes the base address for the following components.
7994 
7995       // A final array section, is one whose length can't be proved to be one.
7996       // If the map item is non-contiguous then we don't treat any array section
7997       // as final array section.
7998       bool IsFinalArraySection =
7999           !IsNonContiguous &&
8000           isFinalArraySectionExpression(I->getAssociatedExpression());
8001 
8002       // If we have a declaration for the mapping use that, otherwise use
8003       // the base declaration of the map clause.
8004       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8005                                      ? I->getAssociatedDeclaration()
8006                                      : BaseDecl;
8007       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8008                                                : MapExpr;
8009 
8010       // Get information on whether the element is a pointer. Have to do a
8011       // special treatment for array sections given that they are built-in
8012       // types.
8013       const auto *OASE =
8014           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8015       const auto *OAShE =
8016           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8017       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8018       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8019       bool IsPointer =
8020           OAShE ||
8021           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8022                        .getCanonicalType()
8023                        ->isAnyPointerType()) ||
8024           I->getAssociatedExpression()->getType()->isAnyPointerType();
8025       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8026                                MapDecl &&
8027                                MapDecl->getType()->isLValueReferenceType();
8028       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8029 
8030       if (OASE)
8031         ++DimSize;
8032 
8033       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8034           IsFinalArraySection) {
8035         // If this is not the last component, we expect the pointer to be
8036         // associated with an array expression or member expression.
8037         assert((Next == CE ||
8038                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8039                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8040                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8041                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8042                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8043                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8044                "Unexpected expression");
8045 
8046         Address LB = Address::invalid();
8047         Address LowestElem = Address::invalid();
8048         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8049                                        const MemberExpr *E) {
8050           const Expr *BaseExpr = E->getBase();
8051           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8052           // scalar.
8053           LValue BaseLV;
8054           if (E->isArrow()) {
8055             LValueBaseInfo BaseInfo;
8056             TBAAAccessInfo TBAAInfo;
8057             Address Addr =
8058                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8059             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8060             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8061           } else {
8062             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8063           }
8064           return BaseLV;
8065         };
8066         if (OAShE) {
8067           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8068                                     CGF.getContext().getTypeAlignInChars(
8069                                         OAShE->getBase()->getType()));
8070         } else if (IsMemberReference) {
8071           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8072           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8073           LowestElem = CGF.EmitLValueForFieldInitialization(
8074                               BaseLVal, cast<FieldDecl>(MapDecl))
8075                            .getAddress(CGF);
8076           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8077                    .getAddress(CGF);
8078         } else {
8079           LowestElem = LB =
8080               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8081                   .getAddress(CGF);
8082         }
8083 
8084         // If this component is a pointer inside the base struct then we don't
8085         // need to create any entry for it - it will be combined with the object
8086         // it is pointing to into a single PTR_AND_OBJ entry.
8087         bool IsMemberPointerOrAddr =
8088             EncounteredME &&
8089             (((IsPointer || ForDeviceAddr) &&
8090               I->getAssociatedExpression() == EncounteredME) ||
8091              (IsPrevMemberReference && !IsPointer) ||
8092              (IsMemberReference && Next != CE &&
8093               !Next->getAssociatedExpression()->getType()->isPointerType()));
8094         if (!OverlappedElements.empty() && Next == CE) {
8095           // Handle base element with the info for overlapped elements.
8096           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8097           assert(!IsPointer &&
8098                  "Unexpected base element with the pointer type.");
8099           // Mark the whole struct as the struct that requires allocation on the
8100           // device.
8101           PartialStruct.LowestElem = {0, LowestElem};
8102           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8103               I->getAssociatedExpression()->getType());
8104           Address HB = CGF.Builder.CreateConstGEP(
8105               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8106                                                               CGF.VoidPtrTy),
8107               TypeSize.getQuantity() - 1);
8108           PartialStruct.HighestElem = {
8109               std::numeric_limits<decltype(
8110                   PartialStruct.HighestElem.first)>::max(),
8111               HB};
8112           PartialStruct.Base = BP;
8113           PartialStruct.LB = LB;
8114           assert(
8115               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8116               "Overlapped elements must be used only once for the variable.");
8117           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8118           // Emit data for non-overlapped data.
8119           OpenMPOffloadMappingFlags Flags =
8120               OMP_MAP_MEMBER_OF |
8121               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8122                              /*AddPtrFlag=*/false,
8123                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8124           llvm::Value *Size = nullptr;
8125           // Do bitcopy of all non-overlapped structure elements.
8126           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8127                    Component : OverlappedElements) {
8128             Address ComponentLB = Address::invalid();
8129             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8130                  Component) {
8131               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8132                 const auto *FD = dyn_cast<FieldDecl>(VD);
8133                 if (FD && FD->getType()->isLValueReferenceType()) {
8134                   const auto *ME =
8135                       cast<MemberExpr>(MC.getAssociatedExpression());
8136                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8137                   ComponentLB =
8138                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8139                           .getAddress(CGF);
8140                 } else {
8141                   ComponentLB =
8142                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8143                           .getAddress(CGF);
8144                 }
8145                 Size = CGF.Builder.CreatePtrDiff(
8146                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8147                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8148                 break;
8149               }
8150             }
8151             assert(Size && "Failed to determine structure size");
8152             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8153             CombinedInfo.BasePointers.push_back(BP.getPointer());
8154             CombinedInfo.Pointers.push_back(LB.getPointer());
8155             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8156                 Size, CGF.Int64Ty, /*isSigned=*/true));
8157             CombinedInfo.Types.push_back(Flags);
8158             CombinedInfo.Mappers.push_back(nullptr);
8159             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8160                                                                       : 1);
8161             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8162           }
8163           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8164           CombinedInfo.BasePointers.push_back(BP.getPointer());
8165           CombinedInfo.Pointers.push_back(LB.getPointer());
8166           Size = CGF.Builder.CreatePtrDiff(
8167               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8168               CGF.EmitCastToVoidPtr(LB.getPointer()));
8169           CombinedInfo.Sizes.push_back(
8170               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8171           CombinedInfo.Types.push_back(Flags);
8172           CombinedInfo.Mappers.push_back(nullptr);
8173           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8174                                                                     : 1);
8175           break;
8176         }
8177         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8178         if (!IsMemberPointerOrAddr ||
8179             (Next == CE && MapType != OMPC_MAP_unknown)) {
8180           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8181           CombinedInfo.BasePointers.push_back(BP.getPointer());
8182           CombinedInfo.Pointers.push_back(LB.getPointer());
8183           CombinedInfo.Sizes.push_back(
8184               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8185           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8186                                                                     : 1);
8187 
8188           // If Mapper is valid, the last component inherits the mapper.
8189           bool HasMapper = Mapper && Next == CE;
8190           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8191 
8192           // We need to add a pointer flag for each map that comes from the
8193           // same expression except for the first one. We also need to signal
8194           // this map is the first one that relates with the current capture
8195           // (there is a set of entries for each capture).
8196           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8197               MapType, MapModifiers, MotionModifiers, IsImplicit,
8198               !IsExpressionFirstInfo || RequiresReference ||
8199                   FirstPointerInComplexData || IsMemberReference,
8200               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8201 
8202           if (!IsExpressionFirstInfo || IsMemberReference) {
8203             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8204             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8205             if (IsPointer || (IsMemberReference && Next != CE))
8206               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8207                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8208 
8209             if (ShouldBeMemberOf) {
8210               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8211               // should be later updated with the correct value of MEMBER_OF.
8212               Flags |= OMP_MAP_MEMBER_OF;
8213               // From now on, all subsequent PTR_AND_OBJ entries should not be
8214               // marked as MEMBER_OF.
8215               ShouldBeMemberOf = false;
8216             }
8217           }
8218 
8219           CombinedInfo.Types.push_back(Flags);
8220         }
8221 
8222         // If we have encountered a member expression so far, keep track of the
8223         // mapped member. If the parent is "*this", then the value declaration
8224         // is nullptr.
8225         if (EncounteredME) {
8226           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8227           unsigned FieldIndex = FD->getFieldIndex();
8228 
8229           // Update info about the lowest and highest elements for this struct
8230           if (!PartialStruct.Base.isValid()) {
8231             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8232             if (IsFinalArraySection) {
8233               Address HB =
8234                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8235                       .getAddress(CGF);
8236               PartialStruct.HighestElem = {FieldIndex, HB};
8237             } else {
8238               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8239             }
8240             PartialStruct.Base = BP;
8241             PartialStruct.LB = BP;
8242           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8243             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8244           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8245             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8246           }
8247         }
8248 
8249         // Need to emit combined struct for array sections.
8250         if (IsFinalArraySection || IsNonContiguous)
8251           PartialStruct.IsArraySection = true;
8252 
8253         // If we have a final array section, we are done with this expression.
8254         if (IsFinalArraySection)
8255           break;
8256 
8257         // The pointer becomes the base for the next element.
8258         if (Next != CE)
8259           BP = IsMemberReference ? LowestElem : LB;
8260 
8261         IsExpressionFirstInfo = false;
8262         IsCaptureFirstInfo = false;
8263         FirstPointerInComplexData = false;
8264         IsPrevMemberReference = IsMemberReference;
8265       } else if (FirstPointerInComplexData) {
8266         QualType Ty = Components.rbegin()
8267                           ->getAssociatedDeclaration()
8268                           ->getType()
8269                           .getNonReferenceType();
8270         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8271         FirstPointerInComplexData = false;
8272       }
8273     }
8274     // If ran into the whole component - allocate the space for the whole
8275     // record.
8276     if (!EncounteredME)
8277       PartialStruct.HasCompleteRecord = true;
8278 
8279     if (!IsNonContiguous)
8280       return;
8281 
8282     const ASTContext &Context = CGF.getContext();
8283 
8284     // For supporting stride in array section, we need to initialize the first
8285     // dimension size as 1, first offset as 0, and first count as 1
8286     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8287     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8288     MapValuesArrayTy CurStrides;
8289     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8290     uint64_t ElementTypeSize;
8291 
8292     // Collect Size information for each dimension and get the element size as
8293     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8294     // should be [10, 10] and the first stride is 4 btyes.
8295     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8296          Components) {
8297       const Expr *AssocExpr = Component.getAssociatedExpression();
8298       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8299 
8300       if (!OASE)
8301         continue;
8302 
8303       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8304       auto *CAT = Context.getAsConstantArrayType(Ty);
8305       auto *VAT = Context.getAsVariableArrayType(Ty);
8306 
8307       // We need all the dimension size except for the last dimension.
8308       assert((VAT || CAT || &Component == &*Components.begin()) &&
8309              "Should be either ConstantArray or VariableArray if not the "
8310              "first Component");
8311 
8312       // Get element size if CurStrides is empty.
8313       if (CurStrides.empty()) {
8314         const Type *ElementType = nullptr;
8315         if (CAT)
8316           ElementType = CAT->getElementType().getTypePtr();
8317         else if (VAT)
8318           ElementType = VAT->getElementType().getTypePtr();
8319         else
8320           assert(&Component == &*Components.begin() &&
8321                  "Only expect pointer (non CAT or VAT) when this is the "
8322                  "first Component");
8323         // If ElementType is null, then it means the base is a pointer
8324         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8325         // for next iteration.
8326         if (ElementType) {
8327           // For the case that having pointer as base, we need to remove one
8328           // level of indirection.
8329           if (&Component != &*Components.begin())
8330             ElementType = ElementType->getPointeeOrArrayElementType();
8331           ElementTypeSize =
8332               Context.getTypeSizeInChars(ElementType).getQuantity();
8333           CurStrides.push_back(
8334               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8335         }
8336       }
8337       // Get dimension value except for the last dimension since we don't need
8338       // it.
8339       if (DimSizes.size() < Components.size() - 1) {
8340         if (CAT)
8341           DimSizes.push_back(llvm::ConstantInt::get(
8342               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8343         else if (VAT)
8344           DimSizes.push_back(CGF.Builder.CreateIntCast(
8345               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8346               /*IsSigned=*/false));
8347       }
8348     }
8349 
8350     // Skip the dummy dimension since we have already have its information.
8351     auto DI = DimSizes.begin() + 1;
8352     // Product of dimension.
8353     llvm::Value *DimProd =
8354         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8355 
8356     // Collect info for non-contiguous. Notice that offset, count, and stride
8357     // are only meaningful for array-section, so we insert a null for anything
8358     // other than array-section.
8359     // Also, the size of offset, count, and stride are not the same as
8360     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8361     // count, and stride are the same as the number of non-contiguous
8362     // declaration in target update to/from clause.
8363     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8364          Components) {
8365       const Expr *AssocExpr = Component.getAssociatedExpression();
8366 
8367       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8368         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8369             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8370             /*isSigned=*/false);
8371         CurOffsets.push_back(Offset);
8372         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8373         CurStrides.push_back(CurStrides.back());
8374         continue;
8375       }
8376 
8377       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8378 
8379       if (!OASE)
8380         continue;
8381 
8382       // Offset
8383       const Expr *OffsetExpr = OASE->getLowerBound();
8384       llvm::Value *Offset = nullptr;
8385       if (!OffsetExpr) {
8386         // If offset is absent, then we just set it to zero.
8387         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8388       } else {
8389         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8390                                            CGF.Int64Ty,
8391                                            /*isSigned=*/false);
8392       }
8393       CurOffsets.push_back(Offset);
8394 
8395       // Count
8396       const Expr *CountExpr = OASE->getLength();
8397       llvm::Value *Count = nullptr;
8398       if (!CountExpr) {
8399         // In Clang, once a high dimension is an array section, we construct all
8400         // the lower dimension as array section, however, for case like
8401         // arr[0:2][2], Clang construct the inner dimension as an array section
8402         // but it actually is not in an array section form according to spec.
8403         if (!OASE->getColonLocFirst().isValid() &&
8404             !OASE->getColonLocSecond().isValid()) {
8405           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8406         } else {
8407           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8408           // When the length is absent it defaults to ⌈(size −
8409           // lower-bound)/stride⌉, where size is the size of the array
8410           // dimension.
8411           const Expr *StrideExpr = OASE->getStride();
8412           llvm::Value *Stride =
8413               StrideExpr
8414                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8415                                               CGF.Int64Ty, /*isSigned=*/false)
8416                   : nullptr;
8417           if (Stride)
8418             Count = CGF.Builder.CreateUDiv(
8419                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8420           else
8421             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8422         }
8423       } else {
8424         Count = CGF.EmitScalarExpr(CountExpr);
8425       }
8426       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8427       CurCounts.push_back(Count);
8428 
8429       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8430       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8431       //              Offset      Count     Stride
8432       //    D0          0           1         4    (int)    <- dummy dimension
8433       //    D1          0           2         8    (2 * (1) * 4)
8434       //    D2          1           2         20   (1 * (1 * 5) * 4)
8435       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8436       const Expr *StrideExpr = OASE->getStride();
8437       llvm::Value *Stride =
8438           StrideExpr
8439               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8440                                           CGF.Int64Ty, /*isSigned=*/false)
8441               : nullptr;
8442       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8443       if (Stride)
8444         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8445       else
8446         CurStrides.push_back(DimProd);
8447       if (DI != DimSizes.end())
8448         ++DI;
8449     }
8450 
8451     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8452     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8453     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8454   }
8455 
8456   /// Return the adjusted map modifiers if the declaration a capture refers to
8457   /// appears in a first-private clause. This is expected to be used only with
8458   /// directives that start with 'target'.
8459   MappableExprsHandler::OpenMPOffloadMappingFlags
8460   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8461     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8462 
8463     // A first private variable captured by reference will use only the
8464     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8465     // declaration is known as first-private in this handler.
8466     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8467       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8468         return MappableExprsHandler::OMP_MAP_TO |
8469                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8470       return MappableExprsHandler::OMP_MAP_PRIVATE |
8471              MappableExprsHandler::OMP_MAP_TO;
8472     }
8473     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8474     if (I != LambdasMap.end())
8475       // for map(to: lambda): using user specified map type.
8476       return getMapTypeBits(
8477           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8478           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8479           /*AddPtrFlag=*/false,
8480           /*AddIsTargetParamFlag=*/false,
8481           /*isNonContiguous=*/false);
8482     return MappableExprsHandler::OMP_MAP_TO |
8483            MappableExprsHandler::OMP_MAP_FROM;
8484   }
8485 
8486   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8487     // Rotate by getFlagMemberOffset() bits.
8488     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8489                                                   << getFlagMemberOffset());
8490   }
8491 
8492   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8493                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8494     // If the entry is PTR_AND_OBJ but has not been marked with the special
8495     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8496     // marked as MEMBER_OF.
8497     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8498         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8499       return;
8500 
8501     // Reset the placeholder value to prepare the flag for the assignment of the
8502     // proper MEMBER_OF value.
8503     Flags &= ~OMP_MAP_MEMBER_OF;
8504     Flags |= MemberOfFlag;
8505   }
8506 
8507   void getPlainLayout(const CXXRecordDecl *RD,
8508                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8509                       bool AsBase) const {
8510     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8511 
8512     llvm::StructType *St =
8513         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8514 
8515     unsigned NumElements = St->getNumElements();
8516     llvm::SmallVector<
8517         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8518         RecordLayout(NumElements);
8519 
8520     // Fill bases.
8521     for (const auto &I : RD->bases()) {
8522       if (I.isVirtual())
8523         continue;
8524       const auto *Base = I.getType()->getAsCXXRecordDecl();
8525       // Ignore empty bases.
8526       if (Base->isEmpty() || CGF.getContext()
8527                                  .getASTRecordLayout(Base)
8528                                  .getNonVirtualSize()
8529                                  .isZero())
8530         continue;
8531 
8532       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8533       RecordLayout[FieldIndex] = Base;
8534     }
8535     // Fill in virtual bases.
8536     for (const auto &I : RD->vbases()) {
8537       const auto *Base = I.getType()->getAsCXXRecordDecl();
8538       // Ignore empty bases.
8539       if (Base->isEmpty())
8540         continue;
8541       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8542       if (RecordLayout[FieldIndex])
8543         continue;
8544       RecordLayout[FieldIndex] = Base;
8545     }
8546     // Fill in all the fields.
8547     assert(!RD->isUnion() && "Unexpected union.");
8548     for (const auto *Field : RD->fields()) {
8549       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8550       // will fill in later.)
8551       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8552         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8553         RecordLayout[FieldIndex] = Field;
8554       }
8555     }
8556     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8557              &Data : RecordLayout) {
8558       if (Data.isNull())
8559         continue;
8560       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8561         getPlainLayout(Base, Layout, /*AsBase=*/true);
8562       else
8563         Layout.push_back(Data.get<const FieldDecl *>());
8564     }
8565   }
8566 
8567   /// Generate all the base pointers, section pointers, sizes, map types, and
8568   /// mappers for the extracted mappable expressions (all included in \a
8569   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8570   /// pair of the relevant declaration and index where it occurs is appended to
8571   /// the device pointers info array.
8572   void generateAllInfoForClauses(
8573       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8574       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8575           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8576     // We have to process the component lists that relate with the same
8577     // declaration in a single chunk so that we can generate the map flags
8578     // correctly. Therefore, we organize all lists in a map.
8579     enum MapKind { Present, Allocs, Other, Total };
8580     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8581                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8582         Info;
8583 
8584     // Helper function to fill the information map for the different supported
8585     // clauses.
8586     auto &&InfoGen =
8587         [&Info, &SkipVarSet](
8588             const ValueDecl *D, MapKind Kind,
8589             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8590             OpenMPMapClauseKind MapType,
8591             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8592             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8593             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8594             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8595           if (SkipVarSet.contains(D))
8596             return;
8597           auto It = Info.find(D);
8598           if (It == Info.end())
8599             It = Info
8600                      .insert(std::make_pair(
8601                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8602                      .first;
8603           It->second[Kind].emplace_back(
8604               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8605               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8606         };
8607 
8608     for (const auto *Cl : Clauses) {
8609       const auto *C = dyn_cast<OMPMapClause>(Cl);
8610       if (!C)
8611         continue;
8612       MapKind Kind = Other;
8613       if (llvm::is_contained(C->getMapTypeModifiers(),
8614                              OMPC_MAP_MODIFIER_present))
8615         Kind = Present;
8616       else if (C->getMapType() == OMPC_MAP_alloc)
8617         Kind = Allocs;
8618       const auto *EI = C->getVarRefs().begin();
8619       for (const auto L : C->component_lists()) {
8620         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8621         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8622                 C->getMapTypeModifiers(), llvm::None,
8623                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8624                 E);
8625         ++EI;
8626       }
8627     }
8628     for (const auto *Cl : Clauses) {
8629       const auto *C = dyn_cast<OMPToClause>(Cl);
8630       if (!C)
8631         continue;
8632       MapKind Kind = Other;
8633       if (llvm::is_contained(C->getMotionModifiers(),
8634                              OMPC_MOTION_MODIFIER_present))
8635         Kind = Present;
8636       const auto *EI = C->getVarRefs().begin();
8637       for (const auto L : C->component_lists()) {
8638         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8639                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8640                 C->isImplicit(), std::get<2>(L), *EI);
8641         ++EI;
8642       }
8643     }
8644     for (const auto *Cl : Clauses) {
8645       const auto *C = dyn_cast<OMPFromClause>(Cl);
8646       if (!C)
8647         continue;
8648       MapKind Kind = Other;
8649       if (llvm::is_contained(C->getMotionModifiers(),
8650                              OMPC_MOTION_MODIFIER_present))
8651         Kind = Present;
8652       const auto *EI = C->getVarRefs().begin();
8653       for (const auto L : C->component_lists()) {
8654         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8655                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8656                 C->isImplicit(), std::get<2>(L), *EI);
8657         ++EI;
8658       }
8659     }
8660 
8661     // Look at the use_device_ptr clause information and mark the existing map
8662     // entries as such. If there is no map information for an entry in the
8663     // use_device_ptr list, we create one with map type 'alloc' and zero size
8664     // section. It is the user fault if that was not mapped before. If there is
8665     // no map information and the pointer is a struct member, then we defer the
8666     // emission of that entry until the whole struct has been processed.
8667     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8668                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8669         DeferredInfo;
8670     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8671 
8672     for (const auto *Cl : Clauses) {
8673       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8674       if (!C)
8675         continue;
8676       for (const auto L : C->component_lists()) {
8677         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8678             std::get<1>(L);
8679         assert(!Components.empty() &&
8680                "Not expecting empty list of components!");
8681         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8682         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8683         const Expr *IE = Components.back().getAssociatedExpression();
8684         // If the first component is a member expression, we have to look into
8685         // 'this', which maps to null in the map of map information. Otherwise
8686         // look directly for the information.
8687         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8688 
8689         // We potentially have map information for this declaration already.
8690         // Look for the first set of components that refer to it.
8691         if (It != Info.end()) {
8692           bool Found = false;
8693           for (auto &Data : It->second) {
8694             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8695               return MI.Components.back().getAssociatedDeclaration() == VD;
8696             });
8697             // If we found a map entry, signal that the pointer has to be
8698             // returned and move on to the next declaration. Exclude cases where
8699             // the base pointer is mapped as array subscript, array section or
8700             // array shaping. The base address is passed as a pointer to base in
8701             // this case and cannot be used as a base for use_device_ptr list
8702             // item.
8703             if (CI != Data.end()) {
8704               auto PrevCI = std::next(CI->Components.rbegin());
8705               const auto *VarD = dyn_cast<VarDecl>(VD);
8706               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8707                   isa<MemberExpr>(IE) ||
8708                   !VD->getType().getNonReferenceType()->isPointerType() ||
8709                   PrevCI == CI->Components.rend() ||
8710                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8711                   VarD->hasLocalStorage()) {
8712                 CI->ReturnDevicePointer = true;
8713                 Found = true;
8714                 break;
8715               }
8716             }
8717           }
8718           if (Found)
8719             continue;
8720         }
8721 
8722         // We didn't find any match in our map information - generate a zero
8723         // size array section - if the pointer is a struct member we defer this
8724         // action until the whole struct has been processed.
8725         if (isa<MemberExpr>(IE)) {
8726           // Insert the pointer into Info to be processed by
8727           // generateInfoForComponentList. Because it is a member pointer
8728           // without a pointee, no entry will be generated for it, therefore
8729           // we need to generate one after the whole struct has been processed.
8730           // Nonetheless, generateInfoForComponentList must be called to take
8731           // the pointer into account for the calculation of the range of the
8732           // partial struct.
8733           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8734                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8735                   nullptr);
8736           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8737         } else {
8738           llvm::Value *Ptr =
8739               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8740           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8741           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8742           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8743           UseDevicePtrCombinedInfo.Sizes.push_back(
8744               llvm::Constant::getNullValue(CGF.Int64Ty));
8745           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8746           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8747         }
8748       }
8749     }
8750 
8751     // Look at the use_device_addr clause information and mark the existing map
8752     // entries as such. If there is no map information for an entry in the
8753     // use_device_addr list, we create one with map type 'alloc' and zero size
8754     // section. It is the user fault if that was not mapped before. If there is
8755     // no map information and the pointer is a struct member, then we defer the
8756     // emission of that entry until the whole struct has been processed.
8757     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8758     for (const auto *Cl : Clauses) {
8759       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8760       if (!C)
8761         continue;
8762       for (const auto L : C->component_lists()) {
8763         assert(!std::get<1>(L).empty() &&
8764                "Not expecting empty list of components!");
8765         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8766         if (!Processed.insert(VD).second)
8767           continue;
8768         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8769         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8770         // If the first component is a member expression, we have to look into
8771         // 'this', which maps to null in the map of map information. Otherwise
8772         // look directly for the information.
8773         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8774 
8775         // We potentially have map information for this declaration already.
8776         // Look for the first set of components that refer to it.
8777         if (It != Info.end()) {
8778           bool Found = false;
8779           for (auto &Data : It->second) {
8780             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8781               return MI.Components.back().getAssociatedDeclaration() == VD;
8782             });
8783             // If we found a map entry, signal that the pointer has to be
8784             // returned and move on to the next declaration.
8785             if (CI != Data.end()) {
8786               CI->ReturnDevicePointer = true;
8787               Found = true;
8788               break;
8789             }
8790           }
8791           if (Found)
8792             continue;
8793         }
8794 
8795         // We didn't find any match in our map information - generate a zero
8796         // size array section - if the pointer is a struct member we defer this
8797         // action until the whole struct has been processed.
8798         if (isa<MemberExpr>(IE)) {
8799           // Insert the pointer into Info to be processed by
8800           // generateInfoForComponentList. Because it is a member pointer
8801           // without a pointee, no entry will be generated for it, therefore
8802           // we need to generate one after the whole struct has been processed.
8803           // Nonetheless, generateInfoForComponentList must be called to take
8804           // the pointer into account for the calculation of the range of the
8805           // partial struct.
8806           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8807                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8808                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8809           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8810         } else {
8811           llvm::Value *Ptr;
8812           if (IE->isGLValue())
8813             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8814           else
8815             Ptr = CGF.EmitScalarExpr(IE);
8816           CombinedInfo.Exprs.push_back(VD);
8817           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8818           CombinedInfo.Pointers.push_back(Ptr);
8819           CombinedInfo.Sizes.push_back(
8820               llvm::Constant::getNullValue(CGF.Int64Ty));
8821           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8822           CombinedInfo.Mappers.push_back(nullptr);
8823         }
8824       }
8825     }
8826 
8827     for (const auto &Data : Info) {
8828       StructRangeInfoTy PartialStruct;
8829       // Temporary generated information.
8830       MapCombinedInfoTy CurInfo;
8831       const Decl *D = Data.first;
8832       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8833       for (const auto &M : Data.second) {
8834         for (const MapInfo &L : M) {
8835           assert(!L.Components.empty() &&
8836                  "Not expecting declaration with no component lists.");
8837 
8838           // Remember the current base pointer index.
8839           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8840           CurInfo.NonContigInfo.IsNonContiguous =
8841               L.Components.back().isNonContiguous();
8842           generateInfoForComponentList(
8843               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8844               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8845               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8846 
8847           // If this entry relates with a device pointer, set the relevant
8848           // declaration and add the 'return pointer' flag.
8849           if (L.ReturnDevicePointer) {
8850             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8851                    "Unexpected number of mapped base pointers.");
8852 
8853             const ValueDecl *RelevantVD =
8854                 L.Components.back().getAssociatedDeclaration();
8855             assert(RelevantVD &&
8856                    "No relevant declaration related with device pointer??");
8857 
8858             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8859                 RelevantVD);
8860             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8861           }
8862         }
8863       }
8864 
8865       // Append any pending zero-length pointers which are struct members and
8866       // used with use_device_ptr or use_device_addr.
8867       auto CI = DeferredInfo.find(Data.first);
8868       if (CI != DeferredInfo.end()) {
8869         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8870           llvm::Value *BasePtr;
8871           llvm::Value *Ptr;
8872           if (L.ForDeviceAddr) {
8873             if (L.IE->isGLValue())
8874               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8875             else
8876               Ptr = this->CGF.EmitScalarExpr(L.IE);
8877             BasePtr = Ptr;
8878             // Entry is RETURN_PARAM. Also, set the placeholder value
8879             // MEMBER_OF=FFFF so that the entry is later updated with the
8880             // correct value of MEMBER_OF.
8881             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8882           } else {
8883             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8884             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8885                                              L.IE->getExprLoc());
8886             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8887             // placeholder value MEMBER_OF=FFFF so that the entry is later
8888             // updated with the correct value of MEMBER_OF.
8889             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8890                                     OMP_MAP_MEMBER_OF);
8891           }
8892           CurInfo.Exprs.push_back(L.VD);
8893           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8894           CurInfo.Pointers.push_back(Ptr);
8895           CurInfo.Sizes.push_back(
8896               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8897           CurInfo.Mappers.push_back(nullptr);
8898         }
8899       }
8900       // If there is an entry in PartialStruct it means we have a struct with
8901       // individual members mapped. Emit an extra combined entry.
8902       if (PartialStruct.Base.isValid()) {
8903         CurInfo.NonContigInfo.Dims.push_back(0);
8904         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8905       }
8906 
8907       // We need to append the results of this capture to what we already
8908       // have.
8909       CombinedInfo.append(CurInfo);
8910     }
8911     // Append data for use_device_ptr clauses.
8912     CombinedInfo.append(UseDevicePtrCombinedInfo);
8913   }
8914 
8915 public:
8916   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8917       : CurDir(&Dir), CGF(CGF) {
8918     // Extract firstprivate clause information.
8919     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8920       for (const auto *D : C->varlists())
8921         FirstPrivateDecls.try_emplace(
8922             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8923     // Extract implicit firstprivates from uses_allocators clauses.
8924     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8925       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8926         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8927         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8928           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8929                                         /*Implicit=*/true);
8930         else if (const auto *VD = dyn_cast<VarDecl>(
8931                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8932                          ->getDecl()))
8933           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8934       }
8935     }
8936     // Extract device pointer clause information.
8937     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8938       for (auto L : C->component_lists())
8939         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8940     // Extract map information.
8941     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8942       if (C->getMapType() != OMPC_MAP_to)
8943         continue;
8944       for (auto L : C->component_lists()) {
8945         const ValueDecl *VD = std::get<0>(L);
8946         const auto *RD = VD ? VD->getType()
8947                                   .getCanonicalType()
8948                                   .getNonReferenceType()
8949                                   ->getAsCXXRecordDecl()
8950                             : nullptr;
8951         if (RD && RD->isLambda())
8952           LambdasMap.try_emplace(std::get<0>(L), C);
8953       }
8954     }
8955   }
8956 
8957   /// Constructor for the declare mapper directive.
8958   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8959       : CurDir(&Dir), CGF(CGF) {}
8960 
8961   /// Generate code for the combined entry if we have a partially mapped struct
8962   /// and take care of the mapping flags of the arguments corresponding to
8963   /// individual struct members.
8964   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8965                          MapFlagsArrayTy &CurTypes,
8966                          const StructRangeInfoTy &PartialStruct,
8967                          const ValueDecl *VD = nullptr,
8968                          bool NotTargetParams = true) const {
8969     if (CurTypes.size() == 1 &&
8970         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8971         !PartialStruct.IsArraySection)
8972       return;
8973     Address LBAddr = PartialStruct.LowestElem.second;
8974     Address HBAddr = PartialStruct.HighestElem.second;
8975     if (PartialStruct.HasCompleteRecord) {
8976       LBAddr = PartialStruct.LB;
8977       HBAddr = PartialStruct.LB;
8978     }
8979     CombinedInfo.Exprs.push_back(VD);
8980     // Base is the base of the struct
8981     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8982     // Pointer is the address of the lowest element
8983     llvm::Value *LB = LBAddr.getPointer();
8984     CombinedInfo.Pointers.push_back(LB);
8985     // There should not be a mapper for a combined entry.
8986     CombinedInfo.Mappers.push_back(nullptr);
8987     // Size is (addr of {highest+1} element) - (addr of lowest element)
8988     llvm::Value *HB = HBAddr.getPointer();
8989     llvm::Value *HAddr =
8990         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8991     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8992     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8993     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8994     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8995                                                   /*isSigned=*/false);
8996     CombinedInfo.Sizes.push_back(Size);
8997     // Map type is always TARGET_PARAM, if generate info for captures.
8998     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8999                                                  : OMP_MAP_TARGET_PARAM);
9000     // If any element has the present modifier, then make sure the runtime
9001     // doesn't attempt to allocate the struct.
9002     if (CurTypes.end() !=
9003         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9004           return Type & OMP_MAP_PRESENT;
9005         }))
9006       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9007     // Remove TARGET_PARAM flag from the first element
9008     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9009     // If any element has the ompx_hold modifier, then make sure the runtime
9010     // uses the hold reference count for the struct as a whole so that it won't
9011     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9012     // elements as well so the runtime knows which reference count to check
9013     // when determining whether it's time for device-to-host transfers of
9014     // individual elements.
9015     if (CurTypes.end() !=
9016         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9017           return Type & OMP_MAP_OMPX_HOLD;
9018         })) {
9019       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9020       for (auto &M : CurTypes)
9021         M |= OMP_MAP_OMPX_HOLD;
9022     }
9023 
9024     // All other current entries will be MEMBER_OF the combined entry
9025     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9026     // 0xFFFF in the MEMBER_OF field).
9027     OpenMPOffloadMappingFlags MemberOfFlag =
9028         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9029     for (auto &M : CurTypes)
9030       setCorrectMemberOfFlag(M, MemberOfFlag);
9031   }
9032 
9033   /// Generate all the base pointers, section pointers, sizes, map types, and
9034   /// mappers for the extracted mappable expressions (all included in \a
9035   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9036   /// pair of the relevant declaration and index where it occurs is appended to
9037   /// the device pointers info array.
9038   void generateAllInfo(
9039       MapCombinedInfoTy &CombinedInfo,
9040       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9041           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9042     assert(CurDir.is<const OMPExecutableDirective *>() &&
9043            "Expect a executable directive");
9044     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9045     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9046   }
9047 
9048   /// Generate all the base pointers, section pointers, sizes, map types, and
9049   /// mappers for the extracted map clauses of user-defined mapper (all included
9050   /// in \a CombinedInfo).
9051   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9052     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9053            "Expect a declare mapper directive");
9054     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9055     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9056   }
9057 
9058   /// Emit capture info for lambdas for variables captured by reference.
9059   void generateInfoForLambdaCaptures(
9060       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9061       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9062     const auto *RD = VD->getType()
9063                          .getCanonicalType()
9064                          .getNonReferenceType()
9065                          ->getAsCXXRecordDecl();
9066     if (!RD || !RD->isLambda())
9067       return;
9068     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9069     LValue VDLVal = CGF.MakeAddrLValue(
9070         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9071     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9072     FieldDecl *ThisCapture = nullptr;
9073     RD->getCaptureFields(Captures, ThisCapture);
9074     if (ThisCapture) {
9075       LValue ThisLVal =
9076           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9077       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9078       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9079                                  VDLVal.getPointer(CGF));
9080       CombinedInfo.Exprs.push_back(VD);
9081       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9082       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9083       CombinedInfo.Sizes.push_back(
9084           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9085                                     CGF.Int64Ty, /*isSigned=*/true));
9086       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9087                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9088       CombinedInfo.Mappers.push_back(nullptr);
9089     }
9090     for (const LambdaCapture &LC : RD->captures()) {
9091       if (!LC.capturesVariable())
9092         continue;
9093       const VarDecl *VD = LC.getCapturedVar();
9094       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9095         continue;
9096       auto It = Captures.find(VD);
9097       assert(It != Captures.end() && "Found lambda capture without field.");
9098       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9099       if (LC.getCaptureKind() == LCK_ByRef) {
9100         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9101         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9102                                    VDLVal.getPointer(CGF));
9103         CombinedInfo.Exprs.push_back(VD);
9104         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9105         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9106         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9107             CGF.getTypeSize(
9108                 VD->getType().getCanonicalType().getNonReferenceType()),
9109             CGF.Int64Ty, /*isSigned=*/true));
9110       } else {
9111         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9112         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9113                                    VDLVal.getPointer(CGF));
9114         CombinedInfo.Exprs.push_back(VD);
9115         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9116         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9117         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9118       }
9119       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9120                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9121       CombinedInfo.Mappers.push_back(nullptr);
9122     }
9123   }
9124 
9125   /// Set correct indices for lambdas captures.
9126   void adjustMemberOfForLambdaCaptures(
9127       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9128       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9129       MapFlagsArrayTy &Types) const {
9130     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9131       // Set correct member_of idx for all implicit lambda captures.
9132       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9133                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9134         continue;
9135       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9136       assert(BasePtr && "Unable to find base lambda address.");
9137       int TgtIdx = -1;
9138       for (unsigned J = I; J > 0; --J) {
9139         unsigned Idx = J - 1;
9140         if (Pointers[Idx] != BasePtr)
9141           continue;
9142         TgtIdx = Idx;
9143         break;
9144       }
9145       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9146       // All other current entries will be MEMBER_OF the combined entry
9147       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9148       // 0xFFFF in the MEMBER_OF field).
9149       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9150       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9151     }
9152   }
9153 
9154   /// Generate the base pointers, section pointers, sizes, map types, and
9155   /// mappers associated to a given capture (all included in \a CombinedInfo).
9156   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9157                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9158                               StructRangeInfoTy &PartialStruct) const {
9159     assert(!Cap->capturesVariableArrayType() &&
9160            "Not expecting to generate map info for a variable array type!");
9161 
9162     // We need to know when we generating information for the first component
9163     const ValueDecl *VD = Cap->capturesThis()
9164                               ? nullptr
9165                               : Cap->getCapturedVar()->getCanonicalDecl();
9166 
9167     // for map(to: lambda): skip here, processing it in
9168     // generateDefaultMapInfo
9169     if (LambdasMap.count(VD))
9170       return;
9171 
9172     // If this declaration appears in a is_device_ptr clause we just have to
9173     // pass the pointer by value. If it is a reference to a declaration, we just
9174     // pass its value.
9175     if (DevPointersMap.count(VD)) {
9176       CombinedInfo.Exprs.push_back(VD);
9177       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9178       CombinedInfo.Pointers.push_back(Arg);
9179       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9180           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9181           /*isSigned=*/true));
9182       CombinedInfo.Types.push_back(
9183           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9184           OMP_MAP_TARGET_PARAM);
9185       CombinedInfo.Mappers.push_back(nullptr);
9186       return;
9187     }
9188 
9189     using MapData =
9190         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9191                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9192                    const ValueDecl *, const Expr *>;
9193     SmallVector<MapData, 4> DeclComponentLists;
9194     assert(CurDir.is<const OMPExecutableDirective *>() &&
9195            "Expect a executable directive");
9196     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9197     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9198       const auto *EI = C->getVarRefs().begin();
9199       for (const auto L : C->decl_component_lists(VD)) {
9200         const ValueDecl *VDecl, *Mapper;
9201         // The Expression is not correct if the mapping is implicit
9202         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9203         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9204         std::tie(VDecl, Components, Mapper) = L;
9205         assert(VDecl == VD && "We got information for the wrong declaration??");
9206         assert(!Components.empty() &&
9207                "Not expecting declaration with no component lists.");
9208         DeclComponentLists.emplace_back(Components, C->getMapType(),
9209                                         C->getMapTypeModifiers(),
9210                                         C->isImplicit(), Mapper, E);
9211         ++EI;
9212       }
9213     }
9214     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9215                                              const MapData &RHS) {
9216       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9217       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9218       bool HasPresent =
9219           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9220       bool HasAllocs = MapType == OMPC_MAP_alloc;
9221       MapModifiers = std::get<2>(RHS);
9222       MapType = std::get<1>(LHS);
9223       bool HasPresentR =
9224           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9225       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9226       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9227     });
9228 
9229     // Find overlapping elements (including the offset from the base element).
9230     llvm::SmallDenseMap<
9231         const MapData *,
9232         llvm::SmallVector<
9233             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9234         4>
9235         OverlappedData;
9236     size_t Count = 0;
9237     for (const MapData &L : DeclComponentLists) {
9238       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9239       OpenMPMapClauseKind MapType;
9240       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9241       bool IsImplicit;
9242       const ValueDecl *Mapper;
9243       const Expr *VarRef;
9244       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9245           L;
9246       ++Count;
9247       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9248         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9249         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9250                  VarRef) = L1;
9251         auto CI = Components.rbegin();
9252         auto CE = Components.rend();
9253         auto SI = Components1.rbegin();
9254         auto SE = Components1.rend();
9255         for (; CI != CE && SI != SE; ++CI, ++SI) {
9256           if (CI->getAssociatedExpression()->getStmtClass() !=
9257               SI->getAssociatedExpression()->getStmtClass())
9258             break;
9259           // Are we dealing with different variables/fields?
9260           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9261             break;
9262         }
9263         // Found overlapping if, at least for one component, reached the head
9264         // of the components list.
9265         if (CI == CE || SI == SE) {
9266           // Ignore it if it is the same component.
9267           if (CI == CE && SI == SE)
9268             continue;
9269           const auto It = (SI == SE) ? CI : SI;
9270           // If one component is a pointer and another one is a kind of
9271           // dereference of this pointer (array subscript, section, dereference,
9272           // etc.), it is not an overlapping.
9273           // Same, if one component is a base and another component is a
9274           // dereferenced pointer memberexpr with the same base.
9275           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9276               (std::prev(It)->getAssociatedDeclaration() &&
9277                std::prev(It)
9278                    ->getAssociatedDeclaration()
9279                    ->getType()
9280                    ->isPointerType()) ||
9281               (It->getAssociatedDeclaration() &&
9282                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9283                std::next(It) != CE && std::next(It) != SE))
9284             continue;
9285           const MapData &BaseData = CI == CE ? L : L1;
9286           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9287               SI == SE ? Components : Components1;
9288           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9289           OverlappedElements.getSecond().push_back(SubData);
9290         }
9291       }
9292     }
9293     // Sort the overlapped elements for each item.
9294     llvm::SmallVector<const FieldDecl *, 4> Layout;
9295     if (!OverlappedData.empty()) {
9296       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9297       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9298       while (BaseType != OrigType) {
9299         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9300         OrigType = BaseType->getPointeeOrArrayElementType();
9301       }
9302 
9303       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9304         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9305       else {
9306         const auto *RD = BaseType->getAsRecordDecl();
9307         Layout.append(RD->field_begin(), RD->field_end());
9308       }
9309     }
9310     for (auto &Pair : OverlappedData) {
9311       llvm::stable_sort(
9312           Pair.getSecond(),
9313           [&Layout](
9314               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9315               OMPClauseMappableExprCommon::MappableExprComponentListRef
9316                   Second) {
9317             auto CI = First.rbegin();
9318             auto CE = First.rend();
9319             auto SI = Second.rbegin();
9320             auto SE = Second.rend();
9321             for (; CI != CE && SI != SE; ++CI, ++SI) {
9322               if (CI->getAssociatedExpression()->getStmtClass() !=
9323                   SI->getAssociatedExpression()->getStmtClass())
9324                 break;
9325               // Are we dealing with different variables/fields?
9326               if (CI->getAssociatedDeclaration() !=
9327                   SI->getAssociatedDeclaration())
9328                 break;
9329             }
9330 
9331             // Lists contain the same elements.
9332             if (CI == CE && SI == SE)
9333               return false;
9334 
9335             // List with less elements is less than list with more elements.
9336             if (CI == CE || SI == SE)
9337               return CI == CE;
9338 
9339             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9340             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9341             if (FD1->getParent() == FD2->getParent())
9342               return FD1->getFieldIndex() < FD2->getFieldIndex();
9343             const auto *It =
9344                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9345                   return FD == FD1 || FD == FD2;
9346                 });
9347             return *It == FD1;
9348           });
9349     }
9350 
9351     // Associated with a capture, because the mapping flags depend on it.
9352     // Go through all of the elements with the overlapped elements.
9353     bool IsFirstComponentList = true;
9354     for (const auto &Pair : OverlappedData) {
9355       const MapData &L = *Pair.getFirst();
9356       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9357       OpenMPMapClauseKind MapType;
9358       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9359       bool IsImplicit;
9360       const ValueDecl *Mapper;
9361       const Expr *VarRef;
9362       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9363           L;
9364       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9365           OverlappedComponents = Pair.getSecond();
9366       generateInfoForComponentList(
9367           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9368           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9369           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9370       IsFirstComponentList = false;
9371     }
9372     // Go through other elements without overlapped elements.
9373     for (const MapData &L : DeclComponentLists) {
9374       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9375       OpenMPMapClauseKind MapType;
9376       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9377       bool IsImplicit;
9378       const ValueDecl *Mapper;
9379       const Expr *VarRef;
9380       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9381           L;
9382       auto It = OverlappedData.find(&L);
9383       if (It == OverlappedData.end())
9384         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9385                                      Components, CombinedInfo, PartialStruct,
9386                                      IsFirstComponentList, IsImplicit, Mapper,
9387                                      /*ForDeviceAddr=*/false, VD, VarRef);
9388       IsFirstComponentList = false;
9389     }
9390   }
9391 
9392   /// Generate the default map information for a given capture \a CI,
9393   /// record field declaration \a RI and captured value \a CV.
9394   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9395                               const FieldDecl &RI, llvm::Value *CV,
9396                               MapCombinedInfoTy &CombinedInfo) const {
9397     bool IsImplicit = true;
9398     // Do the default mapping.
9399     if (CI.capturesThis()) {
9400       CombinedInfo.Exprs.push_back(nullptr);
9401       CombinedInfo.BasePointers.push_back(CV);
9402       CombinedInfo.Pointers.push_back(CV);
9403       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9404       CombinedInfo.Sizes.push_back(
9405           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9406                                     CGF.Int64Ty, /*isSigned=*/true));
9407       // Default map type.
9408       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9409     } else if (CI.capturesVariableByCopy()) {
9410       const VarDecl *VD = CI.getCapturedVar();
9411       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9412       CombinedInfo.BasePointers.push_back(CV);
9413       CombinedInfo.Pointers.push_back(CV);
9414       if (!RI.getType()->isAnyPointerType()) {
9415         // We have to signal to the runtime captures passed by value that are
9416         // not pointers.
9417         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9418         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9419             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9420       } else {
9421         // Pointers are implicitly mapped with a zero size and no flags
9422         // (other than first map that is added for all implicit maps).
9423         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9424         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9425       }
9426       auto I = FirstPrivateDecls.find(VD);
9427       if (I != FirstPrivateDecls.end())
9428         IsImplicit = I->getSecond();
9429     } else {
9430       assert(CI.capturesVariable() && "Expected captured reference.");
9431       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9432       QualType ElementType = PtrTy->getPointeeType();
9433       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9434           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9435       // The default map type for a scalar/complex type is 'to' because by
9436       // default the value doesn't have to be retrieved. For an aggregate
9437       // type, the default is 'tofrom'.
9438       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9439       const VarDecl *VD = CI.getCapturedVar();
9440       auto I = FirstPrivateDecls.find(VD);
9441       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9442       CombinedInfo.BasePointers.push_back(CV);
9443       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9444         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9445             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9446             AlignmentSource::Decl));
9447         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9448       } else {
9449         CombinedInfo.Pointers.push_back(CV);
9450       }
9451       if (I != FirstPrivateDecls.end())
9452         IsImplicit = I->getSecond();
9453     }
9454     // Every default map produces a single argument which is a target parameter.
9455     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9456 
9457     // Add flag stating this is an implicit map.
9458     if (IsImplicit)
9459       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9460 
9461     // No user-defined mapper for default mapping.
9462     CombinedInfo.Mappers.push_back(nullptr);
9463   }
9464 };
9465 } // anonymous namespace
9466 
9467 static void emitNonContiguousDescriptor(
9468     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9469     CGOpenMPRuntime::TargetDataInfo &Info) {
9470   CodeGenModule &CGM = CGF.CGM;
9471   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9472       &NonContigInfo = CombinedInfo.NonContigInfo;
9473 
9474   // Build an array of struct descriptor_dim and then assign it to
9475   // offload_args.
9476   //
9477   // struct descriptor_dim {
9478   //  uint64_t offset;
9479   //  uint64_t count;
9480   //  uint64_t stride
9481   // };
9482   ASTContext &C = CGF.getContext();
9483   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9484   RecordDecl *RD;
9485   RD = C.buildImplicitRecord("descriptor_dim");
9486   RD->startDefinition();
9487   addFieldToRecordDecl(C, RD, Int64Ty);
9488   addFieldToRecordDecl(C, RD, Int64Ty);
9489   addFieldToRecordDecl(C, RD, Int64Ty);
9490   RD->completeDefinition();
9491   QualType DimTy = C.getRecordType(RD);
9492 
9493   enum { OffsetFD = 0, CountFD, StrideFD };
9494   // We need two index variable here since the size of "Dims" is the same as the
9495   // size of Components, however, the size of offset, count, and stride is equal
9496   // to the size of base declaration that is non-contiguous.
9497   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9498     // Skip emitting ir if dimension size is 1 since it cannot be
9499     // non-contiguous.
9500     if (NonContigInfo.Dims[I] == 1)
9501       continue;
9502     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9503     QualType ArrayTy =
9504         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9505     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9506     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9507       unsigned RevIdx = EE - II - 1;
9508       LValue DimsLVal = CGF.MakeAddrLValue(
9509           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9510       // Offset
9511       LValue OffsetLVal = CGF.EmitLValueForField(
9512           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9513       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9514       // Count
9515       LValue CountLVal = CGF.EmitLValueForField(
9516           DimsLVal, *std::next(RD->field_begin(), CountFD));
9517       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9518       // Stride
9519       LValue StrideLVal = CGF.EmitLValueForField(
9520           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9521       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9522     }
9523     // args[I] = &dims
9524     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9525         DimsAddr, CGM.Int8PtrTy);
9526     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9527         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9528         Info.PointersArray, 0, I);
9529     Address PAddr(P, CGF.getPointerAlign());
9530     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9531     ++L;
9532   }
9533 }
9534 
9535 // Try to extract the base declaration from a `this->x` expression if possible.
9536 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9537   if (!E)
9538     return nullptr;
9539 
9540   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9541     if (const MemberExpr *ME =
9542             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9543       return ME->getMemberDecl();
9544   return nullptr;
9545 }
9546 
9547 /// Emit a string constant containing the names of the values mapped to the
9548 /// offloading runtime library.
9549 llvm::Constant *
9550 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9551                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9552 
9553   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9554     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9555 
9556   SourceLocation Loc;
9557   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9558     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9559       Loc = VD->getLocation();
9560     else
9561       Loc = MapExprs.getMapExpr()->getExprLoc();
9562   } else {
9563     Loc = MapExprs.getMapDecl()->getLocation();
9564   }
9565 
9566   std::string ExprName = "";
9567   if (MapExprs.getMapExpr()) {
9568     PrintingPolicy P(CGF.getContext().getLangOpts());
9569     llvm::raw_string_ostream OS(ExprName);
9570     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9571     OS.flush();
9572   } else {
9573     ExprName = MapExprs.getMapDecl()->getNameAsString();
9574   }
9575 
9576   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9577   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9578                                          PLoc.getLine(), PLoc.getColumn());
9579 }
9580 
9581 /// Emit the arrays used to pass the captures and map information to the
9582 /// offloading runtime library. If there is no map or capture information,
9583 /// return nullptr by reference.
9584 static void emitOffloadingArrays(
9585     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9586     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9587     bool IsNonContiguous = false) {
9588   CodeGenModule &CGM = CGF.CGM;
9589   ASTContext &Ctx = CGF.getContext();
9590 
9591   // Reset the array information.
9592   Info.clearArrayInfo();
9593   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9594 
9595   if (Info.NumberOfPtrs) {
9596     // Detect if we have any capture size requiring runtime evaluation of the
9597     // size so that a constant array could be eventually used.
9598     bool hasRuntimeEvaluationCaptureSize = false;
9599     for (llvm::Value *S : CombinedInfo.Sizes)
9600       if (!isa<llvm::Constant>(S)) {
9601         hasRuntimeEvaluationCaptureSize = true;
9602         break;
9603       }
9604 
9605     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9606     QualType PointerArrayType = Ctx.getConstantArrayType(
9607         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9608         /*IndexTypeQuals=*/0);
9609 
9610     Info.BasePointersArray =
9611         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9612     Info.PointersArray =
9613         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9614     Address MappersArray =
9615         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9616     Info.MappersArray = MappersArray.getPointer();
9617 
9618     // If we don't have any VLA types or other types that require runtime
9619     // evaluation, we can use a constant array for the map sizes, otherwise we
9620     // need to fill up the arrays as we do for the pointers.
9621     QualType Int64Ty =
9622         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9623     if (hasRuntimeEvaluationCaptureSize) {
9624       QualType SizeArrayType = Ctx.getConstantArrayType(
9625           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9626           /*IndexTypeQuals=*/0);
9627       Info.SizesArray =
9628           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9629     } else {
9630       // We expect all the sizes to be constant, so we collect them to create
9631       // a constant array.
9632       SmallVector<llvm::Constant *, 16> ConstSizes;
9633       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9634         if (IsNonContiguous &&
9635             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9636           ConstSizes.push_back(llvm::ConstantInt::get(
9637               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9638         } else {
9639           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9640         }
9641       }
9642 
9643       auto *SizesArrayInit = llvm::ConstantArray::get(
9644           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9645       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9646       auto *SizesArrayGbl = new llvm::GlobalVariable(
9647           CGM.getModule(), SizesArrayInit->getType(),
9648           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9649           SizesArrayInit, Name);
9650       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9651       Info.SizesArray = SizesArrayGbl;
9652     }
9653 
9654     // The map types are always constant so we don't need to generate code to
9655     // fill arrays. Instead, we create an array constant.
9656     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9657     llvm::copy(CombinedInfo.Types, Mapping.begin());
9658     std::string MaptypesName =
9659         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9660     auto *MapTypesArrayGbl =
9661         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9662     Info.MapTypesArray = MapTypesArrayGbl;
9663 
9664     // The information types are only built if there is debug information
9665     // requested.
9666     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9667       Info.MapNamesArray = llvm::Constant::getNullValue(
9668           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9669     } else {
9670       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9671         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9672       };
9673       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9674       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9675       std::string MapnamesName =
9676           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9677       auto *MapNamesArrayGbl =
9678           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9679       Info.MapNamesArray = MapNamesArrayGbl;
9680     }
9681 
9682     // If there's a present map type modifier, it must not be applied to the end
9683     // of a region, so generate a separate map type array in that case.
9684     if (Info.separateBeginEndCalls()) {
9685       bool EndMapTypesDiffer = false;
9686       for (uint64_t &Type : Mapping) {
9687         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9688           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9689           EndMapTypesDiffer = true;
9690         }
9691       }
9692       if (EndMapTypesDiffer) {
9693         MapTypesArrayGbl =
9694             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9695         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9696       }
9697     }
9698 
9699     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9700       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9701       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9702           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9703           Info.BasePointersArray, 0, I);
9704       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9705           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9706       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9707       CGF.Builder.CreateStore(BPVal, BPAddr);
9708 
9709       if (Info.requiresDevicePointerInfo())
9710         if (const ValueDecl *DevVD =
9711                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9712           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9713 
9714       llvm::Value *PVal = CombinedInfo.Pointers[I];
9715       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9716           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9717           Info.PointersArray, 0, I);
9718       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9719           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9720       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9721       CGF.Builder.CreateStore(PVal, PAddr);
9722 
9723       if (hasRuntimeEvaluationCaptureSize) {
9724         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9725             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9726             Info.SizesArray,
9727             /*Idx0=*/0,
9728             /*Idx1=*/I);
9729         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9730         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9731                                                           CGM.Int64Ty,
9732                                                           /*isSigned=*/true),
9733                                 SAddr);
9734       }
9735 
9736       // Fill up the mapper array.
9737       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9738       if (CombinedInfo.Mappers[I]) {
9739         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9740             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9741         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9742         Info.HasMapper = true;
9743       }
9744       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9745       CGF.Builder.CreateStore(MFunc, MAddr);
9746     }
9747   }
9748 
9749   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9750       Info.NumberOfPtrs == 0)
9751     return;
9752 
9753   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9754 }
9755 
9756 namespace {
9757 /// Additional arguments for emitOffloadingArraysArgument function.
9758 struct ArgumentsOptions {
9759   bool ForEndCall = false;
9760   ArgumentsOptions() = default;
9761   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9762 };
9763 } // namespace
9764 
9765 /// Emit the arguments to be passed to the runtime library based on the
9766 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9767 /// ForEndCall, emit map types to be passed for the end of the region instead of
9768 /// the beginning.
9769 static void emitOffloadingArraysArgument(
9770     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9771     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9772     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9773     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9774     const ArgumentsOptions &Options = ArgumentsOptions()) {
9775   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9776          "expected region end call to runtime only when end call is separate");
9777   CodeGenModule &CGM = CGF.CGM;
9778   if (Info.NumberOfPtrs) {
9779     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9780         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9781         Info.BasePointersArray,
9782         /*Idx0=*/0, /*Idx1=*/0);
9783     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9784         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9785         Info.PointersArray,
9786         /*Idx0=*/0,
9787         /*Idx1=*/0);
9788     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9789         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9790         /*Idx0=*/0, /*Idx1=*/0);
9791     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9792         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9793         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9794                                                     : Info.MapTypesArray,
9795         /*Idx0=*/0,
9796         /*Idx1=*/0);
9797 
9798     // Only emit the mapper information arrays if debug information is
9799     // requested.
9800     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9801       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802     else
9803       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9804           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9805           Info.MapNamesArray,
9806           /*Idx0=*/0,
9807           /*Idx1=*/0);
9808     // If there is no user-defined mapper, set the mapper array to nullptr to
9809     // avoid an unnecessary data privatization
9810     if (!Info.HasMapper)
9811       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9812     else
9813       MappersArrayArg =
9814           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9815   } else {
9816     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9817     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9818     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9819     MapTypesArrayArg =
9820         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9821     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9822     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9823   }
9824 }
9825 
9826 /// Check for inner distribute directive.
9827 static const OMPExecutableDirective *
9828 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9829   const auto *CS = D.getInnermostCapturedStmt();
9830   const auto *Body =
9831       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9832   const Stmt *ChildStmt =
9833       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9834 
9835   if (const auto *NestedDir =
9836           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9837     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9838     switch (D.getDirectiveKind()) {
9839     case OMPD_target:
9840       if (isOpenMPDistributeDirective(DKind))
9841         return NestedDir;
9842       if (DKind == OMPD_teams) {
9843         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9844             /*IgnoreCaptured=*/true);
9845         if (!Body)
9846           return nullptr;
9847         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9848         if (const auto *NND =
9849                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9850           DKind = NND->getDirectiveKind();
9851           if (isOpenMPDistributeDirective(DKind))
9852             return NND;
9853         }
9854       }
9855       return nullptr;
9856     case OMPD_target_teams:
9857       if (isOpenMPDistributeDirective(DKind))
9858         return NestedDir;
9859       return nullptr;
9860     case OMPD_target_parallel:
9861     case OMPD_target_simd:
9862     case OMPD_target_parallel_for:
9863     case OMPD_target_parallel_for_simd:
9864       return nullptr;
9865     case OMPD_target_teams_distribute:
9866     case OMPD_target_teams_distribute_simd:
9867     case OMPD_target_teams_distribute_parallel_for:
9868     case OMPD_target_teams_distribute_parallel_for_simd:
9869     case OMPD_parallel:
9870     case OMPD_for:
9871     case OMPD_parallel_for:
9872     case OMPD_parallel_master:
9873     case OMPD_parallel_sections:
9874     case OMPD_for_simd:
9875     case OMPD_parallel_for_simd:
9876     case OMPD_cancel:
9877     case OMPD_cancellation_point:
9878     case OMPD_ordered:
9879     case OMPD_threadprivate:
9880     case OMPD_allocate:
9881     case OMPD_task:
9882     case OMPD_simd:
9883     case OMPD_tile:
9884     case OMPD_unroll:
9885     case OMPD_sections:
9886     case OMPD_section:
9887     case OMPD_single:
9888     case OMPD_master:
9889     case OMPD_critical:
9890     case OMPD_taskyield:
9891     case OMPD_barrier:
9892     case OMPD_taskwait:
9893     case OMPD_taskgroup:
9894     case OMPD_atomic:
9895     case OMPD_flush:
9896     case OMPD_depobj:
9897     case OMPD_scan:
9898     case OMPD_teams:
9899     case OMPD_target_data:
9900     case OMPD_target_exit_data:
9901     case OMPD_target_enter_data:
9902     case OMPD_distribute:
9903     case OMPD_distribute_simd:
9904     case OMPD_distribute_parallel_for:
9905     case OMPD_distribute_parallel_for_simd:
9906     case OMPD_teams_distribute:
9907     case OMPD_teams_distribute_simd:
9908     case OMPD_teams_distribute_parallel_for:
9909     case OMPD_teams_distribute_parallel_for_simd:
9910     case OMPD_target_update:
9911     case OMPD_declare_simd:
9912     case OMPD_declare_variant:
9913     case OMPD_begin_declare_variant:
9914     case OMPD_end_declare_variant:
9915     case OMPD_declare_target:
9916     case OMPD_end_declare_target:
9917     case OMPD_declare_reduction:
9918     case OMPD_declare_mapper:
9919     case OMPD_taskloop:
9920     case OMPD_taskloop_simd:
9921     case OMPD_master_taskloop:
9922     case OMPD_master_taskloop_simd:
9923     case OMPD_parallel_master_taskloop:
9924     case OMPD_parallel_master_taskloop_simd:
9925     case OMPD_requires:
9926     case OMPD_metadirective:
9927     case OMPD_unknown:
9928     default:
9929       llvm_unreachable("Unexpected directive.");
9930     }
9931   }
9932 
9933   return nullptr;
9934 }
9935 
9936 /// Emit the user-defined mapper function. The code generation follows the
9937 /// pattern in the example below.
9938 /// \code
9939 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9940 ///                                           void *base, void *begin,
9941 ///                                           int64_t size, int64_t type,
9942 ///                                           void *name = nullptr) {
9943 ///   // Allocate space for an array section first or add a base/begin for
9944 ///   // pointer dereference.
9945 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9946 ///       !maptype.IsDelete)
9947 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9948 ///                                 size*sizeof(Ty), clearToFromMember(type));
9949 ///   // Map members.
9950 ///   for (unsigned i = 0; i < size; i++) {
9951 ///     // For each component specified by this mapper:
9952 ///     for (auto c : begin[i]->all_components) {
9953 ///       if (c.hasMapper())
9954 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9955 ///                       c.arg_type, c.arg_name);
9956 ///       else
9957 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9958 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9959 ///                                     c.arg_name);
9960 ///     }
9961 ///   }
9962 ///   // Delete the array section.
9963 ///   if (size > 1 && maptype.IsDelete)
9964 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9965 ///                                 size*sizeof(Ty), clearToFromMember(type));
9966 /// }
9967 /// \endcode
9968 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9969                                             CodeGenFunction *CGF) {
9970   if (UDMMap.count(D) > 0)
9971     return;
9972   ASTContext &C = CGM.getContext();
9973   QualType Ty = D->getType();
9974   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9975   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9976   auto *MapperVarDecl =
9977       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9978   SourceLocation Loc = D->getLocation();
9979   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9980 
9981   // Prepare mapper function arguments and attributes.
9982   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9983                               C.VoidPtrTy, ImplicitParamDecl::Other);
9984   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9985                             ImplicitParamDecl::Other);
9986   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9987                              C.VoidPtrTy, ImplicitParamDecl::Other);
9988   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9989                             ImplicitParamDecl::Other);
9990   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9991                             ImplicitParamDecl::Other);
9992   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9993                             ImplicitParamDecl::Other);
9994   FunctionArgList Args;
9995   Args.push_back(&HandleArg);
9996   Args.push_back(&BaseArg);
9997   Args.push_back(&BeginArg);
9998   Args.push_back(&SizeArg);
9999   Args.push_back(&TypeArg);
10000   Args.push_back(&NameArg);
10001   const CGFunctionInfo &FnInfo =
10002       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10003   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10004   SmallString<64> TyStr;
10005   llvm::raw_svector_ostream Out(TyStr);
10006   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10007   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10008   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10009                                     Name, &CGM.getModule());
10010   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10011   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10012   // Start the mapper function code generation.
10013   CodeGenFunction MapperCGF(CGM);
10014   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10015   // Compute the starting and end addresses of array elements.
10016   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10017       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10018       C.getPointerType(Int64Ty), Loc);
10019   // Prepare common arguments for array initiation and deletion.
10020   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10022       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10023   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10024       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10025       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10026   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10027       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10028       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10029   // Convert the size in bytes into the number of array elements.
10030   Size = MapperCGF.Builder.CreateExactUDiv(
10031       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10032   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10033       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10034   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10035       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10036   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10037       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10038       C.getPointerType(Int64Ty), Loc);
10039   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10040       MapperCGF.GetAddrOfLocalVar(&NameArg),
10041       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10042 
10043   // Emit array initiation if this is an array section and \p MapType indicates
10044   // that memory allocation is required.
10045   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10046   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10047                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10048 
10049   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10050 
10051   // Emit the loop header block.
10052   MapperCGF.EmitBlock(HeadBB);
10053   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10054   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10055   // Evaluate whether the initial condition is satisfied.
10056   llvm::Value *IsEmpty =
10057       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10058   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10059   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10060 
10061   // Emit the loop body block.
10062   MapperCGF.EmitBlock(BodyBB);
10063   llvm::BasicBlock *LastBB = BodyBB;
10064   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10065       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10066   PtrPHI->addIncoming(PtrBegin, EntryBB);
10067   Address PtrCurrent =
10068       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10069                           .getAlignment()
10070                           .alignmentOfArrayElement(ElementSize));
10071   // Privatize the declared variable of mapper to be the current array element.
10072   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10073   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10074   (void)Scope.Privatize();
10075 
10076   // Get map clause information. Fill up the arrays with all mapped variables.
10077   MappableExprsHandler::MapCombinedInfoTy Info;
10078   MappableExprsHandler MEHandler(*D, MapperCGF);
10079   MEHandler.generateAllInfoForMapper(Info);
10080 
10081   // Call the runtime API __tgt_mapper_num_components to get the number of
10082   // pre-existing components.
10083   llvm::Value *OffloadingArgs[] = {Handle};
10084   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10085       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10086                                             OMPRTL___tgt_mapper_num_components),
10087       OffloadingArgs);
10088   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10089       PreviousSize,
10090       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10091 
10092   // Fill up the runtime mapper handle for all components.
10093   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10094     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10095         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10096     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10097         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10098     llvm::Value *CurSizeArg = Info.Sizes[I];
10099     llvm::Value *CurNameArg =
10100         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10101             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10102             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10103 
10104     // Extract the MEMBER_OF field from the map type.
10105     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10106     llvm::Value *MemberMapType =
10107         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10108 
10109     // Combine the map type inherited from user-defined mapper with that
10110     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10111     // bits of the \a MapType, which is the input argument of the mapper
10112     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10113     // bits of MemberMapType.
10114     // [OpenMP 5.0], 1.2.6. map-type decay.
10115     //        | alloc |  to   | from  | tofrom | release | delete
10116     // ----------------------------------------------------------
10117     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10118     // to     | alloc |  to   | alloc |   to   | release | delete
10119     // from   | alloc | alloc | from  |  from  | release | delete
10120     // tofrom | alloc |  to   | from  | tofrom | release | delete
10121     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10122         MapType,
10123         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10124                                    MappableExprsHandler::OMP_MAP_FROM));
10125     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10126     llvm::BasicBlock *AllocElseBB =
10127         MapperCGF.createBasicBlock("omp.type.alloc.else");
10128     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10129     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10130     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10131     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10132     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10133     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10134     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10135     MapperCGF.EmitBlock(AllocBB);
10136     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10137         MemberMapType,
10138         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10139                                      MappableExprsHandler::OMP_MAP_FROM)));
10140     MapperCGF.Builder.CreateBr(EndBB);
10141     MapperCGF.EmitBlock(AllocElseBB);
10142     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10143         LeftToFrom,
10144         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10145     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10146     // In case of to, clear OMP_MAP_FROM.
10147     MapperCGF.EmitBlock(ToBB);
10148     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10149         MemberMapType,
10150         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10151     MapperCGF.Builder.CreateBr(EndBB);
10152     MapperCGF.EmitBlock(ToElseBB);
10153     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10154         LeftToFrom,
10155         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10156     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10157     // In case of from, clear OMP_MAP_TO.
10158     MapperCGF.EmitBlock(FromBB);
10159     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10160         MemberMapType,
10161         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10162     // In case of tofrom, do nothing.
10163     MapperCGF.EmitBlock(EndBB);
10164     LastBB = EndBB;
10165     llvm::PHINode *CurMapType =
10166         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10167     CurMapType->addIncoming(AllocMapType, AllocBB);
10168     CurMapType->addIncoming(ToMapType, ToBB);
10169     CurMapType->addIncoming(FromMapType, FromBB);
10170     CurMapType->addIncoming(MemberMapType, ToElseBB);
10171 
10172     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10173                                      CurSizeArg, CurMapType, CurNameArg};
10174     if (Info.Mappers[I]) {
10175       // Call the corresponding mapper function.
10176       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10177           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10178       assert(MapperFunc && "Expect a valid mapper function is available.");
10179       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10180     } else {
10181       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10182       // data structure.
10183       MapperCGF.EmitRuntimeCall(
10184           OMPBuilder.getOrCreateRuntimeFunction(
10185               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10186           OffloadingArgs);
10187     }
10188   }
10189 
10190   // Update the pointer to point to the next element that needs to be mapped,
10191   // and check whether we have mapped all elements.
10192   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10193   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10194       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10195   PtrPHI->addIncoming(PtrNext, LastBB);
10196   llvm::Value *IsDone =
10197       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10198   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10199   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10200 
10201   MapperCGF.EmitBlock(ExitBB);
10202   // Emit array deletion if this is an array section and \p MapType indicates
10203   // that deletion is required.
10204   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10205                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10206 
10207   // Emit the function exit block.
10208   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10209   MapperCGF.FinishFunction();
10210   UDMMap.try_emplace(D, Fn);
10211   if (CGF) {
10212     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10213     Decls.second.push_back(D);
10214   }
10215 }
10216 
10217 /// Emit the array initialization or deletion portion for user-defined mapper
10218 /// code generation. First, it evaluates whether an array section is mapped and
10219 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10220 /// true, and \a MapType indicates to not delete this array, array
10221 /// initialization code is generated. If \a IsInit is false, and \a MapType
10222 /// indicates to not this array, array deletion code is generated.
10223 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10224     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10225     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10226     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10227     bool IsInit) {
10228   StringRef Prefix = IsInit ? ".init" : ".del";
10229 
10230   // Evaluate if this is an array section.
10231   llvm::BasicBlock *BodyBB =
10232       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10233   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10234       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10235   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10236       MapType,
10237       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10238   llvm::Value *DeleteCond;
10239   llvm::Value *Cond;
10240   if (IsInit) {
10241     // base != begin?
10242     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10243         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10244     // IsPtrAndObj?
10245     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10246         MapType,
10247         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10248     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10249     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10250     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10251     DeleteCond = MapperCGF.Builder.CreateIsNull(
10252         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10253   } else {
10254     Cond = IsArray;
10255     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10256         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10257   }
10258   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10259   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10260 
10261   MapperCGF.EmitBlock(BodyBB);
10262   // Get the array size by multiplying element size and element number (i.e., \p
10263   // Size).
10264   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10265       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10266   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10267   // memory allocation/deletion purpose only.
10268   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10269       MapType,
10270       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10271                                    MappableExprsHandler::OMP_MAP_FROM)));
10272   MapTypeArg = MapperCGF.Builder.CreateOr(
10273       MapTypeArg,
10274       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10275 
10276   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10277   // data structure.
10278   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10279                                    ArraySize, MapTypeArg, MapName};
10280   MapperCGF.EmitRuntimeCall(
10281       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10282                                             OMPRTL___tgt_push_mapper_component),
10283       OffloadingArgs);
10284 }
10285 
10286 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10287     const OMPDeclareMapperDecl *D) {
10288   auto I = UDMMap.find(D);
10289   if (I != UDMMap.end())
10290     return I->second;
10291   emitUserDefinedMapper(D);
10292   return UDMMap.lookup(D);
10293 }
10294 
10295 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10296     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10297     llvm::Value *DeviceID,
10298     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10299                                      const OMPLoopDirective &D)>
10300         SizeEmitter) {
10301   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10302   const OMPExecutableDirective *TD = &D;
10303   // Get nested teams distribute kind directive, if any.
10304   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10305     TD = getNestedDistributeDirective(CGM.getContext(), D);
10306   if (!TD)
10307     return;
10308   const auto *LD = cast<OMPLoopDirective>(TD);
10309   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10310                                                          PrePostActionTy &) {
10311     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10312       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10313       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10314       CGF.EmitRuntimeCall(
10315           OMPBuilder.getOrCreateRuntimeFunction(
10316               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10317           Args);
10318     }
10319   };
10320   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10321 }
10322 
10323 void CGOpenMPRuntime::emitTargetCall(
10324     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10325     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10326     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10327     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10328                                      const OMPLoopDirective &D)>
10329         SizeEmitter) {
10330   if (!CGF.HaveInsertPoint())
10331     return;
10332 
10333   assert(OutlinedFn && "Invalid outlined function!");
10334 
10335   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10336                                  D.hasClausesOfKind<OMPNowaitClause>();
10337   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10338   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10339   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10340                                             PrePostActionTy &) {
10341     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10342   };
10343   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10344 
10345   CodeGenFunction::OMPTargetDataInfo InputInfo;
10346   llvm::Value *MapTypesArray = nullptr;
10347   llvm::Value *MapNamesArray = nullptr;
10348   // Fill up the pointer arrays and transfer execution to the device.
10349   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10350                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10351                     &CapturedVars,
10352                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10353     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10354       // Reverse offloading is not supported, so just execute on the host.
10355       if (RequiresOuterTask) {
10356         CapturedVars.clear();
10357         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10358       }
10359       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10360       return;
10361     }
10362 
10363     // On top of the arrays that were filled up, the target offloading call
10364     // takes as arguments the device id as well as the host pointer. The host
10365     // pointer is used by the runtime library to identify the current target
10366     // region, so it only has to be unique and not necessarily point to
10367     // anything. It could be the pointer to the outlined function that
10368     // implements the target region, but we aren't using that so that the
10369     // compiler doesn't need to keep that, and could therefore inline the host
10370     // function if proven worthwhile during optimization.
10371 
10372     // From this point on, we need to have an ID of the target region defined.
10373     assert(OutlinedFnID && "Invalid outlined function ID!");
10374 
10375     // Emit device ID if any.
10376     llvm::Value *DeviceID;
10377     if (Device.getPointer()) {
10378       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10379               Device.getInt() == OMPC_DEVICE_device_num) &&
10380              "Expected device_num modifier.");
10381       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10382       DeviceID =
10383           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10384     } else {
10385       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10386     }
10387 
10388     // Emit the number of elements in the offloading arrays.
10389     llvm::Value *PointerNum =
10390         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10391 
10392     // Return value of the runtime offloading call.
10393     llvm::Value *Return;
10394 
10395     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10396     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10397 
10398     // Source location for the ident struct
10399     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10400 
10401     // Emit tripcount for the target loop-based directive.
10402     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10403 
10404     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10405     // The target region is an outlined function launched by the runtime
10406     // via calls __tgt_target() or __tgt_target_teams().
10407     //
10408     // __tgt_target() launches a target region with one team and one thread,
10409     // executing a serial region.  This master thread may in turn launch
10410     // more threads within its team upon encountering a parallel region,
10411     // however, no additional teams can be launched on the device.
10412     //
10413     // __tgt_target_teams() launches a target region with one or more teams,
10414     // each with one or more threads.  This call is required for target
10415     // constructs such as:
10416     //  'target teams'
10417     //  'target' / 'teams'
10418     //  'target teams distribute parallel for'
10419     //  'target parallel'
10420     // and so on.
10421     //
10422     // Note that on the host and CPU targets, the runtime implementation of
10423     // these calls simply call the outlined function without forking threads.
10424     // The outlined functions themselves have runtime calls to
10425     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10426     // the compiler in emitTeamsCall() and emitParallelCall().
10427     //
10428     // In contrast, on the NVPTX target, the implementation of
10429     // __tgt_target_teams() launches a GPU kernel with the requested number
10430     // of teams and threads so no additional calls to the runtime are required.
10431     if (NumTeams) {
10432       // If we have NumTeams defined this means that we have an enclosed teams
10433       // region. Therefore we also expect to have NumThreads defined. These two
10434       // values should be defined in the presence of a teams directive,
10435       // regardless of having any clauses associated. If the user is using teams
10436       // but no clauses, these two values will be the default that should be
10437       // passed to the runtime library - a 32-bit integer with the value zero.
10438       assert(NumThreads && "Thread limit expression should be available along "
10439                            "with number of teams.");
10440       SmallVector<llvm::Value *> OffloadingArgs = {
10441           RTLoc,
10442           DeviceID,
10443           OutlinedFnID,
10444           PointerNum,
10445           InputInfo.BasePointersArray.getPointer(),
10446           InputInfo.PointersArray.getPointer(),
10447           InputInfo.SizesArray.getPointer(),
10448           MapTypesArray,
10449           MapNamesArray,
10450           InputInfo.MappersArray.getPointer(),
10451           NumTeams,
10452           NumThreads};
10453       if (HasNowait) {
10454         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10455         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10456         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10457         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10458         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10459         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10460       }
10461       Return = CGF.EmitRuntimeCall(
10462           OMPBuilder.getOrCreateRuntimeFunction(
10463               CGM.getModule(), HasNowait
10464                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10465                                    : OMPRTL___tgt_target_teams_mapper),
10466           OffloadingArgs);
10467     } else {
10468       SmallVector<llvm::Value *> OffloadingArgs = {
10469           RTLoc,
10470           DeviceID,
10471           OutlinedFnID,
10472           PointerNum,
10473           InputInfo.BasePointersArray.getPointer(),
10474           InputInfo.PointersArray.getPointer(),
10475           InputInfo.SizesArray.getPointer(),
10476           MapTypesArray,
10477           MapNamesArray,
10478           InputInfo.MappersArray.getPointer()};
10479       if (HasNowait) {
10480         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10481         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10482         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10483         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10484         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10485         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10486       }
10487       Return = CGF.EmitRuntimeCall(
10488           OMPBuilder.getOrCreateRuntimeFunction(
10489               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10490                                          : OMPRTL___tgt_target_mapper),
10491           OffloadingArgs);
10492     }
10493 
10494     // Check the error code and execute the host version if required.
10495     llvm::BasicBlock *OffloadFailedBlock =
10496         CGF.createBasicBlock("omp_offload.failed");
10497     llvm::BasicBlock *OffloadContBlock =
10498         CGF.createBasicBlock("omp_offload.cont");
10499     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10500     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10501 
10502     CGF.EmitBlock(OffloadFailedBlock);
10503     if (RequiresOuterTask) {
10504       CapturedVars.clear();
10505       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10506     }
10507     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10508     CGF.EmitBranch(OffloadContBlock);
10509 
10510     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10511   };
10512 
10513   // Notify that the host version must be executed.
10514   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10515                     RequiresOuterTask](CodeGenFunction &CGF,
10516                                        PrePostActionTy &) {
10517     if (RequiresOuterTask) {
10518       CapturedVars.clear();
10519       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10520     }
10521     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10522   };
10523 
10524   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10525                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10526                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10527     // Fill up the arrays with all the captured variables.
10528     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10529 
10530     // Get mappable expression information.
10531     MappableExprsHandler MEHandler(D, CGF);
10532     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10533     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10534 
10535     auto RI = CS.getCapturedRecordDecl()->field_begin();
10536     auto *CV = CapturedVars.begin();
10537     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10538                                               CE = CS.capture_end();
10539          CI != CE; ++CI, ++RI, ++CV) {
10540       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10541       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10542 
10543       // VLA sizes are passed to the outlined region by copy and do not have map
10544       // information associated.
10545       if (CI->capturesVariableArrayType()) {
10546         CurInfo.Exprs.push_back(nullptr);
10547         CurInfo.BasePointers.push_back(*CV);
10548         CurInfo.Pointers.push_back(*CV);
10549         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10550             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10551         // Copy to the device as an argument. No need to retrieve it.
10552         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10553                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10554                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10555         CurInfo.Mappers.push_back(nullptr);
10556       } else {
10557         // If we have any information in the map clause, we use it, otherwise we
10558         // just do a default mapping.
10559         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10560         if (!CI->capturesThis())
10561           MappedVarSet.insert(CI->getCapturedVar());
10562         else
10563           MappedVarSet.insert(nullptr);
10564         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10565           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10566         // Generate correct mapping for variables captured by reference in
10567         // lambdas.
10568         if (CI->capturesVariable())
10569           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10570                                                   CurInfo, LambdaPointers);
10571       }
10572       // We expect to have at least an element of information for this capture.
10573       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10574              "Non-existing map pointer for capture!");
10575       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10576              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10577              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10578              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10579              "Inconsistent map information sizes!");
10580 
10581       // If there is an entry in PartialStruct it means we have a struct with
10582       // individual members mapped. Emit an extra combined entry.
10583       if (PartialStruct.Base.isValid()) {
10584         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10585         MEHandler.emitCombinedEntry(
10586             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10587             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10588       }
10589 
10590       // We need to append the results of this capture to what we already have.
10591       CombinedInfo.append(CurInfo);
10592     }
10593     // Adjust MEMBER_OF flags for the lambdas captures.
10594     MEHandler.adjustMemberOfForLambdaCaptures(
10595         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10596         CombinedInfo.Types);
10597     // Map any list items in a map clause that were not captures because they
10598     // weren't referenced within the construct.
10599     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10600 
10601     TargetDataInfo Info;
10602     // Fill up the arrays and create the arguments.
10603     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10604     emitOffloadingArraysArgument(
10605         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10606         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10607         {/*ForEndTask=*/false});
10608 
10609     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10610     InputInfo.BasePointersArray =
10611         Address(Info.BasePointersArray, CGM.getPointerAlign());
10612     InputInfo.PointersArray =
10613         Address(Info.PointersArray, CGM.getPointerAlign());
10614     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10615     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10616     MapTypesArray = Info.MapTypesArray;
10617     MapNamesArray = Info.MapNamesArray;
10618     if (RequiresOuterTask)
10619       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10620     else
10621       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10622   };
10623 
10624   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10625                              CodeGenFunction &CGF, PrePostActionTy &) {
10626     if (RequiresOuterTask) {
10627       CodeGenFunction::OMPTargetDataInfo InputInfo;
10628       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10629     } else {
10630       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10631     }
10632   };
10633 
10634   // If we have a target function ID it means that we need to support
10635   // offloading, otherwise, just execute on the host. We need to execute on host
10636   // regardless of the conditional in the if clause if, e.g., the user do not
10637   // specify target triples.
10638   if (OutlinedFnID) {
10639     if (IfCond) {
10640       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10641     } else {
10642       RegionCodeGenTy ThenRCG(TargetThenGen);
10643       ThenRCG(CGF);
10644     }
10645   } else {
10646     RegionCodeGenTy ElseRCG(TargetElseGen);
10647     ElseRCG(CGF);
10648   }
10649 }
10650 
10651 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10652                                                     StringRef ParentName) {
10653   if (!S)
10654     return;
10655 
10656   // Codegen OMP target directives that offload compute to the device.
10657   bool RequiresDeviceCodegen =
10658       isa<OMPExecutableDirective>(S) &&
10659       isOpenMPTargetExecutionDirective(
10660           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10661 
10662   if (RequiresDeviceCodegen) {
10663     const auto &E = *cast<OMPExecutableDirective>(S);
10664     unsigned DeviceID;
10665     unsigned FileID;
10666     unsigned Line;
10667     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10668                              FileID, Line);
10669 
10670     // Is this a target region that should not be emitted as an entry point? If
10671     // so just signal we are done with this target region.
10672     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10673                                                             ParentName, Line))
10674       return;
10675 
10676     switch (E.getDirectiveKind()) {
10677     case OMPD_target:
10678       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10679                                                    cast<OMPTargetDirective>(E));
10680       break;
10681     case OMPD_target_parallel:
10682       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10683           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10684       break;
10685     case OMPD_target_teams:
10686       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10687           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10688       break;
10689     case OMPD_target_teams_distribute:
10690       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10691           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10692       break;
10693     case OMPD_target_teams_distribute_simd:
10694       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10695           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10696       break;
10697     case OMPD_target_parallel_for:
10698       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10699           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10700       break;
10701     case OMPD_target_parallel_for_simd:
10702       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10703           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10704       break;
10705     case OMPD_target_simd:
10706       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10707           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10708       break;
10709     case OMPD_target_teams_distribute_parallel_for:
10710       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10711           CGM, ParentName,
10712           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10713       break;
10714     case OMPD_target_teams_distribute_parallel_for_simd:
10715       CodeGenFunction::
10716           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10717               CGM, ParentName,
10718               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10719       break;
10720     case OMPD_parallel:
10721     case OMPD_for:
10722     case OMPD_parallel_for:
10723     case OMPD_parallel_master:
10724     case OMPD_parallel_sections:
10725     case OMPD_for_simd:
10726     case OMPD_parallel_for_simd:
10727     case OMPD_cancel:
10728     case OMPD_cancellation_point:
10729     case OMPD_ordered:
10730     case OMPD_threadprivate:
10731     case OMPD_allocate:
10732     case OMPD_task:
10733     case OMPD_simd:
10734     case OMPD_tile:
10735     case OMPD_unroll:
10736     case OMPD_sections:
10737     case OMPD_section:
10738     case OMPD_single:
10739     case OMPD_master:
10740     case OMPD_critical:
10741     case OMPD_taskyield:
10742     case OMPD_barrier:
10743     case OMPD_taskwait:
10744     case OMPD_taskgroup:
10745     case OMPD_atomic:
10746     case OMPD_flush:
10747     case OMPD_depobj:
10748     case OMPD_scan:
10749     case OMPD_teams:
10750     case OMPD_target_data:
10751     case OMPD_target_exit_data:
10752     case OMPD_target_enter_data:
10753     case OMPD_distribute:
10754     case OMPD_distribute_simd:
10755     case OMPD_distribute_parallel_for:
10756     case OMPD_distribute_parallel_for_simd:
10757     case OMPD_teams_distribute:
10758     case OMPD_teams_distribute_simd:
10759     case OMPD_teams_distribute_parallel_for:
10760     case OMPD_teams_distribute_parallel_for_simd:
10761     case OMPD_target_update:
10762     case OMPD_declare_simd:
10763     case OMPD_declare_variant:
10764     case OMPD_begin_declare_variant:
10765     case OMPD_end_declare_variant:
10766     case OMPD_declare_target:
10767     case OMPD_end_declare_target:
10768     case OMPD_declare_reduction:
10769     case OMPD_declare_mapper:
10770     case OMPD_taskloop:
10771     case OMPD_taskloop_simd:
10772     case OMPD_master_taskloop:
10773     case OMPD_master_taskloop_simd:
10774     case OMPD_parallel_master_taskloop:
10775     case OMPD_parallel_master_taskloop_simd:
10776     case OMPD_requires:
10777     case OMPD_metadirective:
10778     case OMPD_unknown:
10779     default:
10780       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10781     }
10782     return;
10783   }
10784 
10785   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10786     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10787       return;
10788 
10789     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10790     return;
10791   }
10792 
10793   // If this is a lambda function, look into its body.
10794   if (const auto *L = dyn_cast<LambdaExpr>(S))
10795     S = L->getBody();
10796 
10797   // Keep looking for target regions recursively.
10798   for (const Stmt *II : S->children())
10799     scanForTargetRegionsFunctions(II, ParentName);
10800 }
10801 
10802 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10803   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10804       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10805   if (!DevTy)
10806     return false;
10807   // Do not emit device_type(nohost) functions for the host.
10808   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10809     return true;
10810   // Do not emit device_type(host) functions for the device.
10811   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10812     return true;
10813   return false;
10814 }
10815 
10816 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10817   // If emitting code for the host, we do not process FD here. Instead we do
10818   // the normal code generation.
10819   if (!CGM.getLangOpts().OpenMPIsDevice) {
10820     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10821       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10822                                   CGM.getLangOpts().OpenMPIsDevice))
10823         return true;
10824     return false;
10825   }
10826 
10827   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10828   // Try to detect target regions in the function.
10829   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10830     StringRef Name = CGM.getMangledName(GD);
10831     scanForTargetRegionsFunctions(FD->getBody(), Name);
10832     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10833                                 CGM.getLangOpts().OpenMPIsDevice))
10834       return true;
10835   }
10836 
10837   // Do not to emit function if it is not marked as declare target.
10838   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10839          AlreadyEmittedTargetDecls.count(VD) == 0;
10840 }
10841 
10842 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10843   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10844                               CGM.getLangOpts().OpenMPIsDevice))
10845     return true;
10846 
10847   if (!CGM.getLangOpts().OpenMPIsDevice)
10848     return false;
10849 
10850   // Check if there are Ctors/Dtors in this declaration and look for target
10851   // regions in it. We use the complete variant to produce the kernel name
10852   // mangling.
10853   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10854   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10855     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10856       StringRef ParentName =
10857           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10858       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10859     }
10860     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10861       StringRef ParentName =
10862           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10863       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10864     }
10865   }
10866 
10867   // Do not to emit variable if it is not marked as declare target.
10868   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10869       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10870           cast<VarDecl>(GD.getDecl()));
10871   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10872       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10873        HasRequiresUnifiedSharedMemory)) {
10874     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10875     return true;
10876   }
10877   return false;
10878 }
10879 
10880 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10881                                                    llvm::Constant *Addr) {
10882   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10883       !CGM.getLangOpts().OpenMPIsDevice)
10884     return;
10885 
10886   // If we have host/nohost variables, they do not need to be registered.
10887   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10888       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10889   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10890     return;
10891 
10892   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10894   if (!Res) {
10895     if (CGM.getLangOpts().OpenMPIsDevice) {
10896       // Register non-target variables being emitted in device code (debug info
10897       // may cause this).
10898       StringRef VarName = CGM.getMangledName(VD);
10899       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10900     }
10901     return;
10902   }
10903   // Register declare target variables.
10904   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10905   StringRef VarName;
10906   CharUnits VarSize;
10907   llvm::GlobalValue::LinkageTypes Linkage;
10908 
10909   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10910       !HasRequiresUnifiedSharedMemory) {
10911     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10912     VarName = CGM.getMangledName(VD);
10913     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10914       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10915       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10916     } else {
10917       VarSize = CharUnits::Zero();
10918     }
10919     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10920     // Temp solution to prevent optimizations of the internal variables.
10921     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10922       // Do not create a "ref-variable" if the original is not also available
10923       // on the host.
10924       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10925         return;
10926       std::string RefName = getName({VarName, "ref"});
10927       if (!CGM.GetGlobalValue(RefName)) {
10928         llvm::Constant *AddrRef =
10929             getOrCreateInternalVariable(Addr->getType(), RefName);
10930         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10931         GVAddrRef->setConstant(/*Val=*/true);
10932         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10933         GVAddrRef->setInitializer(Addr);
10934         CGM.addCompilerUsedGlobal(GVAddrRef);
10935       }
10936     }
10937   } else {
10938     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10939             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10940              HasRequiresUnifiedSharedMemory)) &&
10941            "Declare target attribute must link or to with unified memory.");
10942     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10943       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10944     else
10945       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10946 
10947     if (CGM.getLangOpts().OpenMPIsDevice) {
10948       VarName = Addr->getName();
10949       Addr = nullptr;
10950     } else {
10951       VarName = getAddrOfDeclareTargetVar(VD).getName();
10952       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10953     }
10954     VarSize = CGM.getPointerSize();
10955     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10956   }
10957 
10958   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10959       VarName, Addr, VarSize, Flags, Linkage);
10960 }
10961 
10962 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10963   if (isa<FunctionDecl>(GD.getDecl()) ||
10964       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10965     return emitTargetFunctions(GD);
10966 
10967   return emitTargetGlobalVariable(GD);
10968 }
10969 
10970 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10971   for (const VarDecl *VD : DeferredGlobalVariables) {
10972     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10973         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10974     if (!Res)
10975       continue;
10976     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10977         !HasRequiresUnifiedSharedMemory) {
10978       CGM.EmitGlobal(VD);
10979     } else {
10980       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10981               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10982                HasRequiresUnifiedSharedMemory)) &&
10983              "Expected link clause or to clause with unified memory.");
10984       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10985     }
10986   }
10987 }
10988 
10989 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10990     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10991   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10992          " Expected target-based directive.");
10993 }
10994 
10995 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10996   for (const OMPClause *Clause : D->clauselists()) {
10997     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10998       HasRequiresUnifiedSharedMemory = true;
10999     } else if (const auto *AC =
11000                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11001       switch (AC->getAtomicDefaultMemOrderKind()) {
11002       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11003         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11004         break;
11005       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11006         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11007         break;
11008       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11009         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11010         break;
11011       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11012         break;
11013       }
11014     }
11015   }
11016 }
11017 
11018 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11019   return RequiresAtomicOrdering;
11020 }
11021 
11022 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11023                                                        LangAS &AS) {
11024   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11025     return false;
11026   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11027   switch(A->getAllocatorType()) {
11028   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11029   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11030   // Not supported, fallback to the default mem space.
11031   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11032   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11033   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11034   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11035   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11036   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11037   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11038     AS = LangAS::Default;
11039     return true;
11040   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11041     llvm_unreachable("Expected predefined allocator for the variables with the "
11042                      "static storage.");
11043   }
11044   return false;
11045 }
11046 
11047 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11048   return HasRequiresUnifiedSharedMemory;
11049 }
11050 
11051 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11052     CodeGenModule &CGM)
11053     : CGM(CGM) {
11054   if (CGM.getLangOpts().OpenMPIsDevice) {
11055     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11056     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11057   }
11058 }
11059 
11060 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11061   if (CGM.getLangOpts().OpenMPIsDevice)
11062     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11063 }
11064 
11065 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11066   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11067     return true;
11068 
11069   const auto *D = cast<FunctionDecl>(GD.getDecl());
11070   // Do not to emit function if it is marked as declare target as it was already
11071   // emitted.
11072   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11073     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11074       if (auto *F = dyn_cast_or_null<llvm::Function>(
11075               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11076         return !F->isDeclaration();
11077       return false;
11078     }
11079     return true;
11080   }
11081 
11082   return !AlreadyEmittedTargetDecls.insert(D).second;
11083 }
11084 
11085 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11086   // If we don't have entries or if we are emitting code for the device, we
11087   // don't need to do anything.
11088   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11089       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11090       (OffloadEntriesInfoManager.empty() &&
11091        !HasEmittedDeclareTargetRegion &&
11092        !HasEmittedTargetRegion))
11093     return nullptr;
11094 
11095   // Create and register the function that handles the requires directives.
11096   ASTContext &C = CGM.getContext();
11097 
11098   llvm::Function *RequiresRegFn;
11099   {
11100     CodeGenFunction CGF(CGM);
11101     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11102     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11103     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11104     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11105     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11106     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11107     // TODO: check for other requires clauses.
11108     // The requires directive takes effect only when a target region is
11109     // present in the compilation unit. Otherwise it is ignored and not
11110     // passed to the runtime. This avoids the runtime from throwing an error
11111     // for mismatching requires clauses across compilation units that don't
11112     // contain at least 1 target region.
11113     assert((HasEmittedTargetRegion ||
11114             HasEmittedDeclareTargetRegion ||
11115             !OffloadEntriesInfoManager.empty()) &&
11116            "Target or declare target region expected.");
11117     if (HasRequiresUnifiedSharedMemory)
11118       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11119     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11120                             CGM.getModule(), OMPRTL___tgt_register_requires),
11121                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11122     CGF.FinishFunction();
11123   }
11124   return RequiresRegFn;
11125 }
11126 
11127 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11128                                     const OMPExecutableDirective &D,
11129                                     SourceLocation Loc,
11130                                     llvm::Function *OutlinedFn,
11131                                     ArrayRef<llvm::Value *> CapturedVars) {
11132   if (!CGF.HaveInsertPoint())
11133     return;
11134 
11135   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11136   CodeGenFunction::RunCleanupsScope Scope(CGF);
11137 
11138   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11139   llvm::Value *Args[] = {
11140       RTLoc,
11141       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11142       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11143   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11144   RealArgs.append(std::begin(Args), std::end(Args));
11145   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11146 
11147   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11148       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11149   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11150 }
11151 
11152 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11153                                          const Expr *NumTeams,
11154                                          const Expr *ThreadLimit,
11155                                          SourceLocation Loc) {
11156   if (!CGF.HaveInsertPoint())
11157     return;
11158 
11159   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11160 
11161   llvm::Value *NumTeamsVal =
11162       NumTeams
11163           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11164                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11165           : CGF.Builder.getInt32(0);
11166 
11167   llvm::Value *ThreadLimitVal =
11168       ThreadLimit
11169           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11170                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11171           : CGF.Builder.getInt32(0);
11172 
11173   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11174   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11175                                      ThreadLimitVal};
11176   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11177                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11178                       PushNumTeamsArgs);
11179 }
11180 
11181 void CGOpenMPRuntime::emitTargetDataCalls(
11182     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11183     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11184   if (!CGF.HaveInsertPoint())
11185     return;
11186 
11187   // Action used to replace the default codegen action and turn privatization
11188   // off.
11189   PrePostActionTy NoPrivAction;
11190 
11191   // Generate the code for the opening of the data environment. Capture all the
11192   // arguments of the runtime call by reference because they are used in the
11193   // closing of the region.
11194   auto &&BeginThenGen = [this, &D, Device, &Info,
11195                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11196     // Fill up the arrays with all the mapped variables.
11197     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11198 
11199     // Get map clause information.
11200     MappableExprsHandler MEHandler(D, CGF);
11201     MEHandler.generateAllInfo(CombinedInfo);
11202 
11203     // Fill up the arrays and create the arguments.
11204     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11205                          /*IsNonContiguous=*/true);
11206 
11207     llvm::Value *BasePointersArrayArg = nullptr;
11208     llvm::Value *PointersArrayArg = nullptr;
11209     llvm::Value *SizesArrayArg = nullptr;
11210     llvm::Value *MapTypesArrayArg = nullptr;
11211     llvm::Value *MapNamesArrayArg = nullptr;
11212     llvm::Value *MappersArrayArg = nullptr;
11213     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11214                                  SizesArrayArg, MapTypesArrayArg,
11215                                  MapNamesArrayArg, MappersArrayArg, Info);
11216 
11217     // Emit device ID if any.
11218     llvm::Value *DeviceID = nullptr;
11219     if (Device) {
11220       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11221                                            CGF.Int64Ty, /*isSigned=*/true);
11222     } else {
11223       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11224     }
11225 
11226     // Emit the number of elements in the offloading arrays.
11227     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11228     //
11229     // Source location for the ident struct
11230     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11231 
11232     llvm::Value *OffloadingArgs[] = {RTLoc,
11233                                      DeviceID,
11234                                      PointerNum,
11235                                      BasePointersArrayArg,
11236                                      PointersArrayArg,
11237                                      SizesArrayArg,
11238                                      MapTypesArrayArg,
11239                                      MapNamesArrayArg,
11240                                      MappersArrayArg};
11241     CGF.EmitRuntimeCall(
11242         OMPBuilder.getOrCreateRuntimeFunction(
11243             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11244         OffloadingArgs);
11245 
11246     // If device pointer privatization is required, emit the body of the region
11247     // here. It will have to be duplicated: with and without privatization.
11248     if (!Info.CaptureDeviceAddrMap.empty())
11249       CodeGen(CGF);
11250   };
11251 
11252   // Generate code for the closing of the data region.
11253   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11254                                                 PrePostActionTy &) {
11255     assert(Info.isValid() && "Invalid data environment closing arguments.");
11256 
11257     llvm::Value *BasePointersArrayArg = nullptr;
11258     llvm::Value *PointersArrayArg = nullptr;
11259     llvm::Value *SizesArrayArg = nullptr;
11260     llvm::Value *MapTypesArrayArg = nullptr;
11261     llvm::Value *MapNamesArrayArg = nullptr;
11262     llvm::Value *MappersArrayArg = nullptr;
11263     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11264                                  SizesArrayArg, MapTypesArrayArg,
11265                                  MapNamesArrayArg, MappersArrayArg, Info,
11266                                  {/*ForEndCall=*/true});
11267 
11268     // Emit device ID if any.
11269     llvm::Value *DeviceID = nullptr;
11270     if (Device) {
11271       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11272                                            CGF.Int64Ty, /*isSigned=*/true);
11273     } else {
11274       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11275     }
11276 
11277     // Emit the number of elements in the offloading arrays.
11278     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11279 
11280     // Source location for the ident struct
11281     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11282 
11283     llvm::Value *OffloadingArgs[] = {RTLoc,
11284                                      DeviceID,
11285                                      PointerNum,
11286                                      BasePointersArrayArg,
11287                                      PointersArrayArg,
11288                                      SizesArrayArg,
11289                                      MapTypesArrayArg,
11290                                      MapNamesArrayArg,
11291                                      MappersArrayArg};
11292     CGF.EmitRuntimeCall(
11293         OMPBuilder.getOrCreateRuntimeFunction(
11294             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11295         OffloadingArgs);
11296   };
11297 
11298   // If we need device pointer privatization, we need to emit the body of the
11299   // region with no privatization in the 'else' branch of the conditional.
11300   // Otherwise, we don't have to do anything.
11301   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11302                                                          PrePostActionTy &) {
11303     if (!Info.CaptureDeviceAddrMap.empty()) {
11304       CodeGen.setAction(NoPrivAction);
11305       CodeGen(CGF);
11306     }
11307   };
11308 
11309   // We don't have to do anything to close the region if the if clause evaluates
11310   // to false.
11311   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11312 
11313   if (IfCond) {
11314     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11315   } else {
11316     RegionCodeGenTy RCG(BeginThenGen);
11317     RCG(CGF);
11318   }
11319 
11320   // If we don't require privatization of device pointers, we emit the body in
11321   // between the runtime calls. This avoids duplicating the body code.
11322   if (Info.CaptureDeviceAddrMap.empty()) {
11323     CodeGen.setAction(NoPrivAction);
11324     CodeGen(CGF);
11325   }
11326 
11327   if (IfCond) {
11328     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11329   } else {
11330     RegionCodeGenTy RCG(EndThenGen);
11331     RCG(CGF);
11332   }
11333 }
11334 
11335 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11336     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11337     const Expr *Device) {
11338   if (!CGF.HaveInsertPoint())
11339     return;
11340 
11341   assert((isa<OMPTargetEnterDataDirective>(D) ||
11342           isa<OMPTargetExitDataDirective>(D) ||
11343           isa<OMPTargetUpdateDirective>(D)) &&
11344          "Expecting either target enter, exit data, or update directives.");
11345 
11346   CodeGenFunction::OMPTargetDataInfo InputInfo;
11347   llvm::Value *MapTypesArray = nullptr;
11348   llvm::Value *MapNamesArray = nullptr;
11349   // Generate the code for the opening of the data environment.
11350   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11351                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11352     // Emit device ID if any.
11353     llvm::Value *DeviceID = nullptr;
11354     if (Device) {
11355       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11356                                            CGF.Int64Ty, /*isSigned=*/true);
11357     } else {
11358       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11359     }
11360 
11361     // Emit the number of elements in the offloading arrays.
11362     llvm::Constant *PointerNum =
11363         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11364 
11365     // Source location for the ident struct
11366     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11367 
11368     llvm::Value *OffloadingArgs[] = {RTLoc,
11369                                      DeviceID,
11370                                      PointerNum,
11371                                      InputInfo.BasePointersArray.getPointer(),
11372                                      InputInfo.PointersArray.getPointer(),
11373                                      InputInfo.SizesArray.getPointer(),
11374                                      MapTypesArray,
11375                                      MapNamesArray,
11376                                      InputInfo.MappersArray.getPointer()};
11377 
11378     // Select the right runtime function call for each standalone
11379     // directive.
11380     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11381     RuntimeFunction RTLFn;
11382     switch (D.getDirectiveKind()) {
11383     case OMPD_target_enter_data:
11384       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11385                         : OMPRTL___tgt_target_data_begin_mapper;
11386       break;
11387     case OMPD_target_exit_data:
11388       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11389                         : OMPRTL___tgt_target_data_end_mapper;
11390       break;
11391     case OMPD_target_update:
11392       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11393                         : OMPRTL___tgt_target_data_update_mapper;
11394       break;
11395     case OMPD_parallel:
11396     case OMPD_for:
11397     case OMPD_parallel_for:
11398     case OMPD_parallel_master:
11399     case OMPD_parallel_sections:
11400     case OMPD_for_simd:
11401     case OMPD_parallel_for_simd:
11402     case OMPD_cancel:
11403     case OMPD_cancellation_point:
11404     case OMPD_ordered:
11405     case OMPD_threadprivate:
11406     case OMPD_allocate:
11407     case OMPD_task:
11408     case OMPD_simd:
11409     case OMPD_tile:
11410     case OMPD_unroll:
11411     case OMPD_sections:
11412     case OMPD_section:
11413     case OMPD_single:
11414     case OMPD_master:
11415     case OMPD_critical:
11416     case OMPD_taskyield:
11417     case OMPD_barrier:
11418     case OMPD_taskwait:
11419     case OMPD_taskgroup:
11420     case OMPD_atomic:
11421     case OMPD_flush:
11422     case OMPD_depobj:
11423     case OMPD_scan:
11424     case OMPD_teams:
11425     case OMPD_target_data:
11426     case OMPD_distribute:
11427     case OMPD_distribute_simd:
11428     case OMPD_distribute_parallel_for:
11429     case OMPD_distribute_parallel_for_simd:
11430     case OMPD_teams_distribute:
11431     case OMPD_teams_distribute_simd:
11432     case OMPD_teams_distribute_parallel_for:
11433     case OMPD_teams_distribute_parallel_for_simd:
11434     case OMPD_declare_simd:
11435     case OMPD_declare_variant:
11436     case OMPD_begin_declare_variant:
11437     case OMPD_end_declare_variant:
11438     case OMPD_declare_target:
11439     case OMPD_end_declare_target:
11440     case OMPD_declare_reduction:
11441     case OMPD_declare_mapper:
11442     case OMPD_taskloop:
11443     case OMPD_taskloop_simd:
11444     case OMPD_master_taskloop:
11445     case OMPD_master_taskloop_simd:
11446     case OMPD_parallel_master_taskloop:
11447     case OMPD_parallel_master_taskloop_simd:
11448     case OMPD_target:
11449     case OMPD_target_simd:
11450     case OMPD_target_teams_distribute:
11451     case OMPD_target_teams_distribute_simd:
11452     case OMPD_target_teams_distribute_parallel_for:
11453     case OMPD_target_teams_distribute_parallel_for_simd:
11454     case OMPD_target_teams:
11455     case OMPD_target_parallel:
11456     case OMPD_target_parallel_for:
11457     case OMPD_target_parallel_for_simd:
11458     case OMPD_requires:
11459     case OMPD_metadirective:
11460     case OMPD_unknown:
11461     default:
11462       llvm_unreachable("Unexpected standalone target data directive.");
11463       break;
11464     }
11465     CGF.EmitRuntimeCall(
11466         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11467         OffloadingArgs);
11468   };
11469 
11470   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11471                           &MapNamesArray](CodeGenFunction &CGF,
11472                                           PrePostActionTy &) {
11473     // Fill up the arrays with all the mapped variables.
11474     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11475 
11476     // Get map clause information.
11477     MappableExprsHandler MEHandler(D, CGF);
11478     MEHandler.generateAllInfo(CombinedInfo);
11479 
11480     TargetDataInfo Info;
11481     // Fill up the arrays and create the arguments.
11482     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11483                          /*IsNonContiguous=*/true);
11484     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11485                              D.hasClausesOfKind<OMPNowaitClause>();
11486     emitOffloadingArraysArgument(
11487         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11488         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11489         {/*ForEndTask=*/false});
11490     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11491     InputInfo.BasePointersArray =
11492         Address(Info.BasePointersArray, CGM.getPointerAlign());
11493     InputInfo.PointersArray =
11494         Address(Info.PointersArray, CGM.getPointerAlign());
11495     InputInfo.SizesArray =
11496         Address(Info.SizesArray, CGM.getPointerAlign());
11497     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11498     MapTypesArray = Info.MapTypesArray;
11499     MapNamesArray = Info.MapNamesArray;
11500     if (RequiresOuterTask)
11501       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11502     else
11503       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11504   };
11505 
11506   if (IfCond) {
11507     emitIfClause(CGF, IfCond, TargetThenGen,
11508                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11509   } else {
11510     RegionCodeGenTy ThenRCG(TargetThenGen);
11511     ThenRCG(CGF);
11512   }
11513 }
11514 
11515 namespace {
11516   /// Kind of parameter in a function with 'declare simd' directive.
11517   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11518   /// Attribute set of the parameter.
11519   struct ParamAttrTy {
11520     ParamKindTy Kind = Vector;
11521     llvm::APSInt StrideOrArg;
11522     llvm::APSInt Alignment;
11523   };
11524 } // namespace
11525 
11526 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11527                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11528   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11529   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11530   // of that clause. The VLEN value must be power of 2.
11531   // In other case the notion of the function`s "characteristic data type" (CDT)
11532   // is used to compute the vector length.
11533   // CDT is defined in the following order:
11534   //   a) For non-void function, the CDT is the return type.
11535   //   b) If the function has any non-uniform, non-linear parameters, then the
11536   //   CDT is the type of the first such parameter.
11537   //   c) If the CDT determined by a) or b) above is struct, union, or class
11538   //   type which is pass-by-value (except for the type that maps to the
11539   //   built-in complex data type), the characteristic data type is int.
11540   //   d) If none of the above three cases is applicable, the CDT is int.
11541   // The VLEN is then determined based on the CDT and the size of vector
11542   // register of that ISA for which current vector version is generated. The
11543   // VLEN is computed using the formula below:
11544   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11545   // where vector register size specified in section 3.2.1 Registers and the
11546   // Stack Frame of original AMD64 ABI document.
11547   QualType RetType = FD->getReturnType();
11548   if (RetType.isNull())
11549     return 0;
11550   ASTContext &C = FD->getASTContext();
11551   QualType CDT;
11552   if (!RetType.isNull() && !RetType->isVoidType()) {
11553     CDT = RetType;
11554   } else {
11555     unsigned Offset = 0;
11556     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11557       if (ParamAttrs[Offset].Kind == Vector)
11558         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11559       ++Offset;
11560     }
11561     if (CDT.isNull()) {
11562       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11563         if (ParamAttrs[I + Offset].Kind == Vector) {
11564           CDT = FD->getParamDecl(I)->getType();
11565           break;
11566         }
11567       }
11568     }
11569   }
11570   if (CDT.isNull())
11571     CDT = C.IntTy;
11572   CDT = CDT->getCanonicalTypeUnqualified();
11573   if (CDT->isRecordType() || CDT->isUnionType())
11574     CDT = C.IntTy;
11575   return C.getTypeSize(CDT);
11576 }
11577 
11578 static void
11579 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11580                            const llvm::APSInt &VLENVal,
11581                            ArrayRef<ParamAttrTy> ParamAttrs,
11582                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11583   struct ISADataTy {
11584     char ISA;
11585     unsigned VecRegSize;
11586   };
11587   ISADataTy ISAData[] = {
11588       {
11589           'b', 128
11590       }, // SSE
11591       {
11592           'c', 256
11593       }, // AVX
11594       {
11595           'd', 256
11596       }, // AVX2
11597       {
11598           'e', 512
11599       }, // AVX512
11600   };
11601   llvm::SmallVector<char, 2> Masked;
11602   switch (State) {
11603   case OMPDeclareSimdDeclAttr::BS_Undefined:
11604     Masked.push_back('N');
11605     Masked.push_back('M');
11606     break;
11607   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11608     Masked.push_back('N');
11609     break;
11610   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11611     Masked.push_back('M');
11612     break;
11613   }
11614   for (char Mask : Masked) {
11615     for (const ISADataTy &Data : ISAData) {
11616       SmallString<256> Buffer;
11617       llvm::raw_svector_ostream Out(Buffer);
11618       Out << "_ZGV" << Data.ISA << Mask;
11619       if (!VLENVal) {
11620         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11621         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11622         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11623       } else {
11624         Out << VLENVal;
11625       }
11626       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11627         switch (ParamAttr.Kind){
11628         case LinearWithVarStride:
11629           Out << 's' << ParamAttr.StrideOrArg;
11630           break;
11631         case Linear:
11632           Out << 'l';
11633           if (ParamAttr.StrideOrArg != 1)
11634             Out << ParamAttr.StrideOrArg;
11635           break;
11636         case Uniform:
11637           Out << 'u';
11638           break;
11639         case Vector:
11640           Out << 'v';
11641           break;
11642         }
11643         if (!!ParamAttr.Alignment)
11644           Out << 'a' << ParamAttr.Alignment;
11645       }
11646       Out << '_' << Fn->getName();
11647       Fn->addFnAttr(Out.str());
11648     }
11649   }
11650 }
11651 
11652 // This are the Functions that are needed to mangle the name of the
11653 // vector functions generated by the compiler, according to the rules
11654 // defined in the "Vector Function ABI specifications for AArch64",
11655 // available at
11656 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11657 
11658 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11659 ///
11660 /// TODO: Need to implement the behavior for reference marked with a
11661 /// var or no linear modifiers (1.b in the section). For this, we
11662 /// need to extend ParamKindTy to support the linear modifiers.
11663 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11664   QT = QT.getCanonicalType();
11665 
11666   if (QT->isVoidType())
11667     return false;
11668 
11669   if (Kind == ParamKindTy::Uniform)
11670     return false;
11671 
11672   if (Kind == ParamKindTy::Linear)
11673     return false;
11674 
11675   // TODO: Handle linear references with modifiers
11676 
11677   if (Kind == ParamKindTy::LinearWithVarStride)
11678     return false;
11679 
11680   return true;
11681 }
11682 
11683 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11684 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11685   QT = QT.getCanonicalType();
11686   unsigned Size = C.getTypeSize(QT);
11687 
11688   // Only scalars and complex within 16 bytes wide set PVB to true.
11689   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11690     return false;
11691 
11692   if (QT->isFloatingType())
11693     return true;
11694 
11695   if (QT->isIntegerType())
11696     return true;
11697 
11698   if (QT->isPointerType())
11699     return true;
11700 
11701   // TODO: Add support for complex types (section 3.1.2, item 2).
11702 
11703   return false;
11704 }
11705 
11706 /// Computes the lane size (LS) of a return type or of an input parameter,
11707 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11708 /// TODO: Add support for references, section 3.2.1, item 1.
11709 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11710   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11711     QualType PTy = QT.getCanonicalType()->getPointeeType();
11712     if (getAArch64PBV(PTy, C))
11713       return C.getTypeSize(PTy);
11714   }
11715   if (getAArch64PBV(QT, C))
11716     return C.getTypeSize(QT);
11717 
11718   return C.getTypeSize(C.getUIntPtrType());
11719 }
11720 
11721 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11722 // signature of the scalar function, as defined in 3.2.2 of the
11723 // AAVFABI.
11724 static std::tuple<unsigned, unsigned, bool>
11725 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11726   QualType RetType = FD->getReturnType().getCanonicalType();
11727 
11728   ASTContext &C = FD->getASTContext();
11729 
11730   bool OutputBecomesInput = false;
11731 
11732   llvm::SmallVector<unsigned, 8> Sizes;
11733   if (!RetType->isVoidType()) {
11734     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11735     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11736       OutputBecomesInput = true;
11737   }
11738   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11739     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11740     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11741   }
11742 
11743   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11744   // The LS of a function parameter / return value can only be a power
11745   // of 2, starting from 8 bits, up to 128.
11746   assert(llvm::all_of(Sizes,
11747                       [](unsigned Size) {
11748                         return Size == 8 || Size == 16 || Size == 32 ||
11749                                Size == 64 || Size == 128;
11750                       }) &&
11751          "Invalid size");
11752 
11753   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11754                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11755                          OutputBecomesInput);
11756 }
11757 
11758 /// Mangle the parameter part of the vector function name according to
11759 /// their OpenMP classification. The mangling function is defined in
11760 /// section 3.5 of the AAVFABI.
11761 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11762   SmallString<256> Buffer;
11763   llvm::raw_svector_ostream Out(Buffer);
11764   for (const auto &ParamAttr : ParamAttrs) {
11765     switch (ParamAttr.Kind) {
11766     case LinearWithVarStride:
11767       Out << "ls" << ParamAttr.StrideOrArg;
11768       break;
11769     case Linear:
11770       Out << 'l';
11771       // Don't print the step value if it is not present or if it is
11772       // equal to 1.
11773       if (ParamAttr.StrideOrArg != 1)
11774         Out << ParamAttr.StrideOrArg;
11775       break;
11776     case Uniform:
11777       Out << 'u';
11778       break;
11779     case Vector:
11780       Out << 'v';
11781       break;
11782     }
11783 
11784     if (!!ParamAttr.Alignment)
11785       Out << 'a' << ParamAttr.Alignment;
11786   }
11787 
11788   return std::string(Out.str());
11789 }
11790 
11791 // Function used to add the attribute. The parameter `VLEN` is
11792 // templated to allow the use of "x" when targeting scalable functions
11793 // for SVE.
11794 template <typename T>
11795 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11796                                  char ISA, StringRef ParSeq,
11797                                  StringRef MangledName, bool OutputBecomesInput,
11798                                  llvm::Function *Fn) {
11799   SmallString<256> Buffer;
11800   llvm::raw_svector_ostream Out(Buffer);
11801   Out << Prefix << ISA << LMask << VLEN;
11802   if (OutputBecomesInput)
11803     Out << "v";
11804   Out << ParSeq << "_" << MangledName;
11805   Fn->addFnAttr(Out.str());
11806 }
11807 
11808 // Helper function to generate the Advanced SIMD names depending on
11809 // the value of the NDS when simdlen is not present.
11810 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11811                                       StringRef Prefix, char ISA,
11812                                       StringRef ParSeq, StringRef MangledName,
11813                                       bool OutputBecomesInput,
11814                                       llvm::Function *Fn) {
11815   switch (NDS) {
11816   case 8:
11817     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11818                          OutputBecomesInput, Fn);
11819     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11820                          OutputBecomesInput, Fn);
11821     break;
11822   case 16:
11823     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11824                          OutputBecomesInput, Fn);
11825     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11826                          OutputBecomesInput, Fn);
11827     break;
11828   case 32:
11829     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11830                          OutputBecomesInput, Fn);
11831     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11832                          OutputBecomesInput, Fn);
11833     break;
11834   case 64:
11835   case 128:
11836     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11837                          OutputBecomesInput, Fn);
11838     break;
11839   default:
11840     llvm_unreachable("Scalar type is too wide.");
11841   }
11842 }
11843 
11844 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11845 static void emitAArch64DeclareSimdFunction(
11846     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11847     ArrayRef<ParamAttrTy> ParamAttrs,
11848     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11849     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11850 
11851   // Get basic data for building the vector signature.
11852   const auto Data = getNDSWDS(FD, ParamAttrs);
11853   const unsigned NDS = std::get<0>(Data);
11854   const unsigned WDS = std::get<1>(Data);
11855   const bool OutputBecomesInput = std::get<2>(Data);
11856 
11857   // Check the values provided via `simdlen` by the user.
11858   // 1. A `simdlen(1)` doesn't produce vector signatures,
11859   if (UserVLEN == 1) {
11860     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11861         DiagnosticsEngine::Warning,
11862         "The clause simdlen(1) has no effect when targeting aarch64.");
11863     CGM.getDiags().Report(SLoc, DiagID);
11864     return;
11865   }
11866 
11867   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11868   // Advanced SIMD output.
11869   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11870     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11871         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11872                                     "power of 2 when targeting Advanced SIMD.");
11873     CGM.getDiags().Report(SLoc, DiagID);
11874     return;
11875   }
11876 
11877   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11878   // limits.
11879   if (ISA == 's' && UserVLEN != 0) {
11880     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11881       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11882           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11883                                       "lanes in the architectural constraints "
11884                                       "for SVE (min is 128-bit, max is "
11885                                       "2048-bit, by steps of 128-bit)");
11886       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11887       return;
11888     }
11889   }
11890 
11891   // Sort out parameter sequence.
11892   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11893   StringRef Prefix = "_ZGV";
11894   // Generate simdlen from user input (if any).
11895   if (UserVLEN) {
11896     if (ISA == 's') {
11897       // SVE generates only a masked function.
11898       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11899                            OutputBecomesInput, Fn);
11900     } else {
11901       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11902       // Advanced SIMD generates one or two functions, depending on
11903       // the `[not]inbranch` clause.
11904       switch (State) {
11905       case OMPDeclareSimdDeclAttr::BS_Undefined:
11906         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11907                              OutputBecomesInput, Fn);
11908         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11909                              OutputBecomesInput, Fn);
11910         break;
11911       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11912         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11913                              OutputBecomesInput, Fn);
11914         break;
11915       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11916         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11917                              OutputBecomesInput, Fn);
11918         break;
11919       }
11920     }
11921   } else {
11922     // If no user simdlen is provided, follow the AAVFABI rules for
11923     // generating the vector length.
11924     if (ISA == 's') {
11925       // SVE, section 3.4.1, item 1.
11926       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11927                            OutputBecomesInput, Fn);
11928     } else {
11929       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11930       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11931       // two vector names depending on the use of the clause
11932       // `[not]inbranch`.
11933       switch (State) {
11934       case OMPDeclareSimdDeclAttr::BS_Undefined:
11935         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11936                                   OutputBecomesInput, Fn);
11937         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11938                                   OutputBecomesInput, Fn);
11939         break;
11940       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11941         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11942                                   OutputBecomesInput, Fn);
11943         break;
11944       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11945         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11946                                   OutputBecomesInput, Fn);
11947         break;
11948       }
11949     }
11950   }
11951 }
11952 
11953 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11954                                               llvm::Function *Fn) {
11955   ASTContext &C = CGM.getContext();
11956   FD = FD->getMostRecentDecl();
11957   // Map params to their positions in function decl.
11958   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11959   if (isa<CXXMethodDecl>(FD))
11960     ParamPositions.try_emplace(FD, 0);
11961   unsigned ParamPos = ParamPositions.size();
11962   for (const ParmVarDecl *P : FD->parameters()) {
11963     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11964     ++ParamPos;
11965   }
11966   while (FD) {
11967     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11968       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11969       // Mark uniform parameters.
11970       for (const Expr *E : Attr->uniforms()) {
11971         E = E->IgnoreParenImpCasts();
11972         unsigned Pos;
11973         if (isa<CXXThisExpr>(E)) {
11974           Pos = ParamPositions[FD];
11975         } else {
11976           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11977                                 ->getCanonicalDecl();
11978           Pos = ParamPositions[PVD];
11979         }
11980         ParamAttrs[Pos].Kind = Uniform;
11981       }
11982       // Get alignment info.
11983       auto NI = Attr->alignments_begin();
11984       for (const Expr *E : Attr->aligneds()) {
11985         E = E->IgnoreParenImpCasts();
11986         unsigned Pos;
11987         QualType ParmTy;
11988         if (isa<CXXThisExpr>(E)) {
11989           Pos = ParamPositions[FD];
11990           ParmTy = E->getType();
11991         } else {
11992           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11993                                 ->getCanonicalDecl();
11994           Pos = ParamPositions[PVD];
11995           ParmTy = PVD->getType();
11996         }
11997         ParamAttrs[Pos].Alignment =
11998             (*NI)
11999                 ? (*NI)->EvaluateKnownConstInt(C)
12000                 : llvm::APSInt::getUnsigned(
12001                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12002                           .getQuantity());
12003         ++NI;
12004       }
12005       // Mark linear parameters.
12006       auto SI = Attr->steps_begin();
12007       auto MI = Attr->modifiers_begin();
12008       for (const Expr *E : Attr->linears()) {
12009         E = E->IgnoreParenImpCasts();
12010         unsigned Pos;
12011         // Rescaling factor needed to compute the linear parameter
12012         // value in the mangled name.
12013         unsigned PtrRescalingFactor = 1;
12014         if (isa<CXXThisExpr>(E)) {
12015           Pos = ParamPositions[FD];
12016         } else {
12017           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12018                                 ->getCanonicalDecl();
12019           Pos = ParamPositions[PVD];
12020           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12021             PtrRescalingFactor = CGM.getContext()
12022                                      .getTypeSizeInChars(P->getPointeeType())
12023                                      .getQuantity();
12024         }
12025         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12026         ParamAttr.Kind = Linear;
12027         // Assuming a stride of 1, for `linear` without modifiers.
12028         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12029         if (*SI) {
12030           Expr::EvalResult Result;
12031           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12032             if (const auto *DRE =
12033                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12034               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12035                 ParamAttr.Kind = LinearWithVarStride;
12036                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12037                     ParamPositions[StridePVD->getCanonicalDecl()]);
12038               }
12039             }
12040           } else {
12041             ParamAttr.StrideOrArg = Result.Val.getInt();
12042           }
12043         }
12044         // If we are using a linear clause on a pointer, we need to
12045         // rescale the value of linear_step with the byte size of the
12046         // pointee type.
12047         if (Linear == ParamAttr.Kind)
12048           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12049         ++SI;
12050         ++MI;
12051       }
12052       llvm::APSInt VLENVal;
12053       SourceLocation ExprLoc;
12054       const Expr *VLENExpr = Attr->getSimdlen();
12055       if (VLENExpr) {
12056         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12057         ExprLoc = VLENExpr->getExprLoc();
12058       }
12059       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12060       if (CGM.getTriple().isX86()) {
12061         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12062       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12063         unsigned VLEN = VLENVal.getExtValue();
12064         StringRef MangledName = Fn->getName();
12065         if (CGM.getTarget().hasFeature("sve"))
12066           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12067                                          MangledName, 's', 128, Fn, ExprLoc);
12068         if (CGM.getTarget().hasFeature("neon"))
12069           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12070                                          MangledName, 'n', 128, Fn, ExprLoc);
12071       }
12072     }
12073     FD = FD->getPreviousDecl();
12074   }
12075 }
12076 
12077 namespace {
12078 /// Cleanup action for doacross support.
12079 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12080 public:
12081   static const int DoacrossFinArgs = 2;
12082 
12083 private:
12084   llvm::FunctionCallee RTLFn;
12085   llvm::Value *Args[DoacrossFinArgs];
12086 
12087 public:
12088   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12089                     ArrayRef<llvm::Value *> CallArgs)
12090       : RTLFn(RTLFn) {
12091     assert(CallArgs.size() == DoacrossFinArgs);
12092     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12093   }
12094   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12095     if (!CGF.HaveInsertPoint())
12096       return;
12097     CGF.EmitRuntimeCall(RTLFn, Args);
12098   }
12099 };
12100 } // namespace
12101 
12102 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12103                                        const OMPLoopDirective &D,
12104                                        ArrayRef<Expr *> NumIterations) {
12105   if (!CGF.HaveInsertPoint())
12106     return;
12107 
12108   ASTContext &C = CGM.getContext();
12109   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12110   RecordDecl *RD;
12111   if (KmpDimTy.isNull()) {
12112     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12113     //  kmp_int64 lo; // lower
12114     //  kmp_int64 up; // upper
12115     //  kmp_int64 st; // stride
12116     // };
12117     RD = C.buildImplicitRecord("kmp_dim");
12118     RD->startDefinition();
12119     addFieldToRecordDecl(C, RD, Int64Ty);
12120     addFieldToRecordDecl(C, RD, Int64Ty);
12121     addFieldToRecordDecl(C, RD, Int64Ty);
12122     RD->completeDefinition();
12123     KmpDimTy = C.getRecordType(RD);
12124   } else {
12125     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12126   }
12127   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12128   QualType ArrayTy =
12129       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12130 
12131   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12132   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12133   enum { LowerFD = 0, UpperFD, StrideFD };
12134   // Fill dims with data.
12135   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12136     LValue DimsLVal = CGF.MakeAddrLValue(
12137         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12138     // dims.upper = num_iterations;
12139     LValue UpperLVal = CGF.EmitLValueForField(
12140         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12141     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12142         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12143         Int64Ty, NumIterations[I]->getExprLoc());
12144     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12145     // dims.stride = 1;
12146     LValue StrideLVal = CGF.EmitLValueForField(
12147         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12148     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12149                           StrideLVal);
12150   }
12151 
12152   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12153   // kmp_int32 num_dims, struct kmp_dim * dims);
12154   llvm::Value *Args[] = {
12155       emitUpdateLocation(CGF, D.getBeginLoc()),
12156       getThreadID(CGF, D.getBeginLoc()),
12157       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12158       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12159           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12160           CGM.VoidPtrTy)};
12161 
12162   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12163       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12164   CGF.EmitRuntimeCall(RTLFn, Args);
12165   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12166       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12167   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12168       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12169   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12170                                              llvm::makeArrayRef(FiniArgs));
12171 }
12172 
12173 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12174                                           const OMPDependClause *C) {
12175   QualType Int64Ty =
12176       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12177   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12178   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12179       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12180   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12181   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12182     const Expr *CounterVal = C->getLoopData(I);
12183     assert(CounterVal);
12184     llvm::Value *CntVal = CGF.EmitScalarConversion(
12185         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12186         CounterVal->getExprLoc());
12187     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12188                           /*Volatile=*/false, Int64Ty);
12189   }
12190   llvm::Value *Args[] = {
12191       emitUpdateLocation(CGF, C->getBeginLoc()),
12192       getThreadID(CGF, C->getBeginLoc()),
12193       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12194   llvm::FunctionCallee RTLFn;
12195   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12196     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12197                                                   OMPRTL___kmpc_doacross_post);
12198   } else {
12199     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12200     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12201                                                   OMPRTL___kmpc_doacross_wait);
12202   }
12203   CGF.EmitRuntimeCall(RTLFn, Args);
12204 }
12205 
12206 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12207                                llvm::FunctionCallee Callee,
12208                                ArrayRef<llvm::Value *> Args) const {
12209   assert(Loc.isValid() && "Outlined function call location must be valid.");
12210   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12211 
12212   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12213     if (Fn->doesNotThrow()) {
12214       CGF.EmitNounwindRuntimeCall(Fn, Args);
12215       return;
12216     }
12217   }
12218   CGF.EmitRuntimeCall(Callee, Args);
12219 }
12220 
12221 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12222     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12223     ArrayRef<llvm::Value *> Args) const {
12224   emitCall(CGF, Loc, OutlinedFn, Args);
12225 }
12226 
12227 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12228   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12229     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12230       HasEmittedDeclareTargetRegion = true;
12231 }
12232 
12233 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12234                                              const VarDecl *NativeParam,
12235                                              const VarDecl *TargetParam) const {
12236   return CGF.GetAddrOfLocalVar(NativeParam);
12237 }
12238 
12239 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12240                                                    const VarDecl *VD) {
12241   if (!VD)
12242     return Address::invalid();
12243   Address UntiedAddr = Address::invalid();
12244   Address UntiedRealAddr = Address::invalid();
12245   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12246   if (It != FunctionToUntiedTaskStackMap.end()) {
12247     const UntiedLocalVarsAddressesMap &UntiedData =
12248         UntiedLocalVarsStack[It->second];
12249     auto I = UntiedData.find(VD);
12250     if (I != UntiedData.end()) {
12251       UntiedAddr = I->second.first;
12252       UntiedRealAddr = I->second.second;
12253     }
12254   }
12255   const VarDecl *CVD = VD->getCanonicalDecl();
12256   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12257     // Use the default allocation.
12258     if (!isAllocatableDecl(VD))
12259       return UntiedAddr;
12260     llvm::Value *Size;
12261     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12262     if (CVD->getType()->isVariablyModifiedType()) {
12263       Size = CGF.getTypeSize(CVD->getType());
12264       // Align the size: ((size + align - 1) / align) * align
12265       Size = CGF.Builder.CreateNUWAdd(
12266           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12267       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12268       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12269     } else {
12270       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12271       Size = CGM.getSize(Sz.alignTo(Align));
12272     }
12273     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12274     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12275     assert(AA->getAllocator() &&
12276            "Expected allocator expression for non-default allocator.");
12277     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12278     // According to the standard, the original allocator type is a enum
12279     // (integer). Convert to pointer type, if required.
12280     Allocator = CGF.EmitScalarConversion(
12281         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12282         AA->getAllocator()->getExprLoc());
12283     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12284 
12285     llvm::Value *Addr =
12286         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12287                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12288                             Args, getName({CVD->getName(), ".void.addr"}));
12289     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12290         CGM.getModule(), OMPRTL___kmpc_free);
12291     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12292     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12293         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12294     if (UntiedAddr.isValid())
12295       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12296 
12297     // Cleanup action for allocate support.
12298     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12299       llvm::FunctionCallee RTLFn;
12300       SourceLocation::UIntTy LocEncoding;
12301       Address Addr;
12302       const Expr *Allocator;
12303 
12304     public:
12305       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12306                            SourceLocation::UIntTy LocEncoding, Address Addr,
12307                            const Expr *Allocator)
12308           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12309             Allocator(Allocator) {}
12310       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12311         if (!CGF.HaveInsertPoint())
12312           return;
12313         llvm::Value *Args[3];
12314         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12315             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12316         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12317             Addr.getPointer(), CGF.VoidPtrTy);
12318         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12319         // According to the standard, the original allocator type is a enum
12320         // (integer). Convert to pointer type, if required.
12321         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12322                                             CGF.getContext().VoidPtrTy,
12323                                             Allocator->getExprLoc());
12324         Args[2] = AllocVal;
12325 
12326         CGF.EmitRuntimeCall(RTLFn, Args);
12327       }
12328     };
12329     Address VDAddr =
12330         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12331     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12332         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12333         VDAddr, AA->getAllocator());
12334     if (UntiedRealAddr.isValid())
12335       if (auto *Region =
12336               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12337         Region->emitUntiedSwitch(CGF);
12338     return VDAddr;
12339   }
12340   return UntiedAddr;
12341 }
12342 
12343 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12344                                              const VarDecl *VD) const {
12345   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12346   if (It == FunctionToUntiedTaskStackMap.end())
12347     return false;
12348   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12349 }
12350 
12351 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12352     CodeGenModule &CGM, const OMPLoopDirective &S)
12353     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12354   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12355   if (!NeedToPush)
12356     return;
12357   NontemporalDeclsSet &DS =
12358       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12359   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12360     for (const Stmt *Ref : C->private_refs()) {
12361       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12362       const ValueDecl *VD;
12363       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12364         VD = DRE->getDecl();
12365       } else {
12366         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12367         assert((ME->isImplicitCXXThis() ||
12368                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12369                "Expected member of current class.");
12370         VD = ME->getMemberDecl();
12371       }
12372       DS.insert(VD);
12373     }
12374   }
12375 }
12376 
12377 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12378   if (!NeedToPush)
12379     return;
12380   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12381 }
12382 
12383 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12384     CodeGenFunction &CGF,
12385     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12386                           std::pair<Address, Address>> &LocalVars)
12387     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12388   if (!NeedToPush)
12389     return;
12390   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12391       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12392   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12393 }
12394 
12395 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12396   if (!NeedToPush)
12397     return;
12398   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12399 }
12400 
12401 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12402   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12403 
12404   return llvm::any_of(
12405       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12406       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12407 }
12408 
12409 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12410     const OMPExecutableDirective &S,
12411     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12412     const {
12413   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12414   // Vars in target/task regions must be excluded completely.
12415   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12416       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12417     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12418     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12419     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12420     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12421       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12422         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12423     }
12424   }
12425   // Exclude vars in private clauses.
12426   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12427     for (const Expr *Ref : C->varlists()) {
12428       if (!Ref->getType()->isScalarType())
12429         continue;
12430       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12431       if (!DRE)
12432         continue;
12433       NeedToCheckForLPCs.insert(DRE->getDecl());
12434     }
12435   }
12436   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12437     for (const Expr *Ref : C->varlists()) {
12438       if (!Ref->getType()->isScalarType())
12439         continue;
12440       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12441       if (!DRE)
12442         continue;
12443       NeedToCheckForLPCs.insert(DRE->getDecl());
12444     }
12445   }
12446   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12447     for (const Expr *Ref : C->varlists()) {
12448       if (!Ref->getType()->isScalarType())
12449         continue;
12450       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12451       if (!DRE)
12452         continue;
12453       NeedToCheckForLPCs.insert(DRE->getDecl());
12454     }
12455   }
12456   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12457     for (const Expr *Ref : C->varlists()) {
12458       if (!Ref->getType()->isScalarType())
12459         continue;
12460       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12461       if (!DRE)
12462         continue;
12463       NeedToCheckForLPCs.insert(DRE->getDecl());
12464     }
12465   }
12466   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12467     for (const Expr *Ref : C->varlists()) {
12468       if (!Ref->getType()->isScalarType())
12469         continue;
12470       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12471       if (!DRE)
12472         continue;
12473       NeedToCheckForLPCs.insert(DRE->getDecl());
12474     }
12475   }
12476   for (const Decl *VD : NeedToCheckForLPCs) {
12477     for (const LastprivateConditionalData &Data :
12478          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12479       if (Data.DeclToUniqueName.count(VD) > 0) {
12480         if (!Data.Disabled)
12481           NeedToAddForLPCsAsDisabled.insert(VD);
12482         break;
12483       }
12484     }
12485   }
12486 }
12487 
12488 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12489     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12490     : CGM(CGF.CGM),
12491       Action((CGM.getLangOpts().OpenMP >= 50 &&
12492               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12493                            [](const OMPLastprivateClause *C) {
12494                              return C->getKind() ==
12495                                     OMPC_LASTPRIVATE_conditional;
12496                            }))
12497                  ? ActionToDo::PushAsLastprivateConditional
12498                  : ActionToDo::DoNotPush) {
12499   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12500   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12501     return;
12502   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12503          "Expected a push action.");
12504   LastprivateConditionalData &Data =
12505       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12506   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12507     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12508       continue;
12509 
12510     for (const Expr *Ref : C->varlists()) {
12511       Data.DeclToUniqueName.insert(std::make_pair(
12512           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12513           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12514     }
12515   }
12516   Data.IVLVal = IVLVal;
12517   Data.Fn = CGF.CurFn;
12518 }
12519 
12520 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12521     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12522     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12523   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12524   if (CGM.getLangOpts().OpenMP < 50)
12525     return;
12526   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12527   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12528   if (!NeedToAddForLPCsAsDisabled.empty()) {
12529     Action = ActionToDo::DisableLastprivateConditional;
12530     LastprivateConditionalData &Data =
12531         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12532     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12533       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12534     Data.Fn = CGF.CurFn;
12535     Data.Disabled = true;
12536   }
12537 }
12538 
12539 CGOpenMPRuntime::LastprivateConditionalRAII
12540 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12541     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12542   return LastprivateConditionalRAII(CGF, S);
12543 }
12544 
12545 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12546   if (CGM.getLangOpts().OpenMP < 50)
12547     return;
12548   if (Action == ActionToDo::DisableLastprivateConditional) {
12549     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12550            "Expected list of disabled private vars.");
12551     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12552   }
12553   if (Action == ActionToDo::PushAsLastprivateConditional) {
12554     assert(
12555         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12556         "Expected list of lastprivate conditional vars.");
12557     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12558   }
12559 }
12560 
12561 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12562                                                         const VarDecl *VD) {
12563   ASTContext &C = CGM.getContext();
12564   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12565   if (I == LastprivateConditionalToTypes.end())
12566     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12567   QualType NewType;
12568   const FieldDecl *VDField;
12569   const FieldDecl *FiredField;
12570   LValue BaseLVal;
12571   auto VI = I->getSecond().find(VD);
12572   if (VI == I->getSecond().end()) {
12573     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12574     RD->startDefinition();
12575     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12576     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12577     RD->completeDefinition();
12578     NewType = C.getRecordType(RD);
12579     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12580     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12581     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12582   } else {
12583     NewType = std::get<0>(VI->getSecond());
12584     VDField = std::get<1>(VI->getSecond());
12585     FiredField = std::get<2>(VI->getSecond());
12586     BaseLVal = std::get<3>(VI->getSecond());
12587   }
12588   LValue FiredLVal =
12589       CGF.EmitLValueForField(BaseLVal, FiredField);
12590   CGF.EmitStoreOfScalar(
12591       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12592       FiredLVal);
12593   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12594 }
12595 
12596 namespace {
12597 /// Checks if the lastprivate conditional variable is referenced in LHS.
12598 class LastprivateConditionalRefChecker final
12599     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12600   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12601   const Expr *FoundE = nullptr;
12602   const Decl *FoundD = nullptr;
12603   StringRef UniqueDeclName;
12604   LValue IVLVal;
12605   llvm::Function *FoundFn = nullptr;
12606   SourceLocation Loc;
12607 
12608 public:
12609   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12610     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12611          llvm::reverse(LPM)) {
12612       auto It = D.DeclToUniqueName.find(E->getDecl());
12613       if (It == D.DeclToUniqueName.end())
12614         continue;
12615       if (D.Disabled)
12616         return false;
12617       FoundE = E;
12618       FoundD = E->getDecl()->getCanonicalDecl();
12619       UniqueDeclName = It->second;
12620       IVLVal = D.IVLVal;
12621       FoundFn = D.Fn;
12622       break;
12623     }
12624     return FoundE == E;
12625   }
12626   bool VisitMemberExpr(const MemberExpr *E) {
12627     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12628       return false;
12629     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12630          llvm::reverse(LPM)) {
12631       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12632       if (It == D.DeclToUniqueName.end())
12633         continue;
12634       if (D.Disabled)
12635         return false;
12636       FoundE = E;
12637       FoundD = E->getMemberDecl()->getCanonicalDecl();
12638       UniqueDeclName = It->second;
12639       IVLVal = D.IVLVal;
12640       FoundFn = D.Fn;
12641       break;
12642     }
12643     return FoundE == E;
12644   }
12645   bool VisitStmt(const Stmt *S) {
12646     for (const Stmt *Child : S->children()) {
12647       if (!Child)
12648         continue;
12649       if (const auto *E = dyn_cast<Expr>(Child))
12650         if (!E->isGLValue())
12651           continue;
12652       if (Visit(Child))
12653         return true;
12654     }
12655     return false;
12656   }
12657   explicit LastprivateConditionalRefChecker(
12658       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12659       : LPM(LPM) {}
12660   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12661   getFoundData() const {
12662     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12663   }
12664 };
12665 } // namespace
12666 
12667 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12668                                                        LValue IVLVal,
12669                                                        StringRef UniqueDeclName,
12670                                                        LValue LVal,
12671                                                        SourceLocation Loc) {
12672   // Last updated loop counter for the lastprivate conditional var.
12673   // int<xx> last_iv = 0;
12674   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12675   llvm::Constant *LastIV =
12676       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12677   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12678       IVLVal.getAlignment().getAsAlign());
12679   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12680 
12681   // Last value of the lastprivate conditional.
12682   // decltype(priv_a) last_a;
12683   llvm::Constant *Last = getOrCreateInternalVariable(
12684       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12685   cast<llvm::GlobalVariable>(Last)->setAlignment(
12686       LVal.getAlignment().getAsAlign());
12687   LValue LastLVal =
12688       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12689 
12690   // Global loop counter. Required to handle inner parallel-for regions.
12691   // iv
12692   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12693 
12694   // #pragma omp critical(a)
12695   // if (last_iv <= iv) {
12696   //   last_iv = iv;
12697   //   last_a = priv_a;
12698   // }
12699   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12700                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12701     Action.Enter(CGF);
12702     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12703     // (last_iv <= iv) ? Check if the variable is updated and store new
12704     // value in global var.
12705     llvm::Value *CmpRes;
12706     if (IVLVal.getType()->isSignedIntegerType()) {
12707       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12708     } else {
12709       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12710              "Loop iteration variable must be integer.");
12711       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12712     }
12713     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12714     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12715     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12716     // {
12717     CGF.EmitBlock(ThenBB);
12718 
12719     //   last_iv = iv;
12720     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12721 
12722     //   last_a = priv_a;
12723     switch (CGF.getEvaluationKind(LVal.getType())) {
12724     case TEK_Scalar: {
12725       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12726       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12727       break;
12728     }
12729     case TEK_Complex: {
12730       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12731       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12732       break;
12733     }
12734     case TEK_Aggregate:
12735       llvm_unreachable(
12736           "Aggregates are not supported in lastprivate conditional.");
12737     }
12738     // }
12739     CGF.EmitBranch(ExitBB);
12740     // There is no need to emit line number for unconditional branch.
12741     (void)ApplyDebugLocation::CreateEmpty(CGF);
12742     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12743   };
12744 
12745   if (CGM.getLangOpts().OpenMPSimd) {
12746     // Do not emit as a critical region as no parallel region could be emitted.
12747     RegionCodeGenTy ThenRCG(CodeGen);
12748     ThenRCG(CGF);
12749   } else {
12750     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12751   }
12752 }
12753 
12754 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12755                                                          const Expr *LHS) {
12756   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12757     return;
12758   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12759   if (!Checker.Visit(LHS))
12760     return;
12761   const Expr *FoundE;
12762   const Decl *FoundD;
12763   StringRef UniqueDeclName;
12764   LValue IVLVal;
12765   llvm::Function *FoundFn;
12766   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12767       Checker.getFoundData();
12768   if (FoundFn != CGF.CurFn) {
12769     // Special codegen for inner parallel regions.
12770     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12771     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12772     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12773            "Lastprivate conditional is not found in outer region.");
12774     QualType StructTy = std::get<0>(It->getSecond());
12775     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12776     LValue PrivLVal = CGF.EmitLValue(FoundE);
12777     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12778         PrivLVal.getAddress(CGF),
12779         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12780     LValue BaseLVal =
12781         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12782     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12783     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12784                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12785                         FiredLVal, llvm::AtomicOrdering::Unordered,
12786                         /*IsVolatile=*/true, /*isInit=*/false);
12787     return;
12788   }
12789 
12790   // Private address of the lastprivate conditional in the current context.
12791   // priv_a
12792   LValue LVal = CGF.EmitLValue(FoundE);
12793   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12794                                    FoundE->getExprLoc());
12795 }
12796 
12797 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12798     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12799     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12800   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12801     return;
12802   auto Range = llvm::reverse(LastprivateConditionalStack);
12803   auto It = llvm::find_if(
12804       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12805   if (It == Range.end() || It->Fn != CGF.CurFn)
12806     return;
12807   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12808   assert(LPCI != LastprivateConditionalToTypes.end() &&
12809          "Lastprivates must be registered already.");
12810   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12811   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12812   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12813   for (const auto &Pair : It->DeclToUniqueName) {
12814     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12815     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12816       continue;
12817     auto I = LPCI->getSecond().find(Pair.first);
12818     assert(I != LPCI->getSecond().end() &&
12819            "Lastprivate must be rehistered already.");
12820     // bool Cmp = priv_a.Fired != 0;
12821     LValue BaseLVal = std::get<3>(I->getSecond());
12822     LValue FiredLVal =
12823         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12824     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12825     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12826     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12827     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12828     // if (Cmp) {
12829     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12830     CGF.EmitBlock(ThenBB);
12831     Address Addr = CGF.GetAddrOfLocalVar(VD);
12832     LValue LVal;
12833     if (VD->getType()->isReferenceType())
12834       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12835                                            AlignmentSource::Decl);
12836     else
12837       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12838                                 AlignmentSource::Decl);
12839     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12840                                      D.getBeginLoc());
12841     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12842     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12843     // }
12844   }
12845 }
12846 
12847 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12848     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12849     SourceLocation Loc) {
12850   if (CGF.getLangOpts().OpenMP < 50)
12851     return;
12852   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12853   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12854          "Unknown lastprivate conditional variable.");
12855   StringRef UniqueName = It->second;
12856   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12857   // The variable was not updated in the region - exit.
12858   if (!GV)
12859     return;
12860   LValue LPLVal = CGF.MakeAddrLValue(
12861       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12862   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12863   CGF.EmitStoreOfScalar(Res, PrivLVal);
12864 }
12865 
12866 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12867     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12868     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12873     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12874     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
12878 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12879     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12880     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12881     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12882     bool Tied, unsigned &NumberOfParts) {
12883   llvm_unreachable("Not supported in SIMD-only mode");
12884 }
12885 
12886 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12887                                            SourceLocation Loc,
12888                                            llvm::Function *OutlinedFn,
12889                                            ArrayRef<llvm::Value *> CapturedVars,
12890                                            const Expr *IfCond) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
12894 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12895     CodeGenFunction &CGF, StringRef CriticalName,
12896     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12897     const Expr *Hint) {
12898   llvm_unreachable("Not supported in SIMD-only mode");
12899 }
12900 
12901 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12902                                            const RegionCodeGenTy &MasterOpGen,
12903                                            SourceLocation Loc) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
12907 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12908                                            const RegionCodeGenTy &MasterOpGen,
12909                                            SourceLocation Loc,
12910                                            const Expr *Filter) {
12911   llvm_unreachable("Not supported in SIMD-only mode");
12912 }
12913 
12914 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12915                                             SourceLocation Loc) {
12916   llvm_unreachable("Not supported in SIMD-only mode");
12917 }
12918 
12919 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12920     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12921     SourceLocation Loc) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitSingleRegion(
12926     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12927     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12928     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12929     ArrayRef<const Expr *> AssignmentOps) {
12930   llvm_unreachable("Not supported in SIMD-only mode");
12931 }
12932 
12933 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12934                                             const RegionCodeGenTy &OrderedOpGen,
12935                                             SourceLocation Loc,
12936                                             bool IsThreads) {
12937   llvm_unreachable("Not supported in SIMD-only mode");
12938 }
12939 
12940 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12941                                           SourceLocation Loc,
12942                                           OpenMPDirectiveKind Kind,
12943                                           bool EmitChecks,
12944                                           bool ForceSimpleCall) {
12945   llvm_unreachable("Not supported in SIMD-only mode");
12946 }
12947 
12948 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12949     CodeGenFunction &CGF, SourceLocation Loc,
12950     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12951     bool Ordered, const DispatchRTInput &DispatchValues) {
12952   llvm_unreachable("Not supported in SIMD-only mode");
12953 }
12954 
12955 void CGOpenMPSIMDRuntime::emitForStaticInit(
12956     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12957     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12958   llvm_unreachable("Not supported in SIMD-only mode");
12959 }
12960 
12961 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12962     CodeGenFunction &CGF, SourceLocation Loc,
12963     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12964   llvm_unreachable("Not supported in SIMD-only mode");
12965 }
12966 
12967 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12968                                                      SourceLocation Loc,
12969                                                      unsigned IVSize,
12970                                                      bool IVSigned) {
12971   llvm_unreachable("Not supported in SIMD-only mode");
12972 }
12973 
12974 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12975                                               SourceLocation Loc,
12976                                               OpenMPDirectiveKind DKind) {
12977   llvm_unreachable("Not supported in SIMD-only mode");
12978 }
12979 
12980 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12981                                               SourceLocation Loc,
12982                                               unsigned IVSize, bool IVSigned,
12983                                               Address IL, Address LB,
12984                                               Address UB, Address ST) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12989                                                llvm::Value *NumThreads,
12990                                                SourceLocation Loc) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12995                                              ProcBindKind ProcBind,
12996                                              SourceLocation Loc) {
12997   llvm_unreachable("Not supported in SIMD-only mode");
12998 }
12999 
13000 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13001                                                     const VarDecl *VD,
13002                                                     Address VDAddr,
13003                                                     SourceLocation Loc) {
13004   llvm_unreachable("Not supported in SIMD-only mode");
13005 }
13006 
13007 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13008     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13009     CodeGenFunction *CGF) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13014     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13019                                     ArrayRef<const Expr *> Vars,
13020                                     SourceLocation Loc,
13021                                     llvm::AtomicOrdering AO) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13026                                        const OMPExecutableDirective &D,
13027                                        llvm::Function *TaskFunction,
13028                                        QualType SharedsTy, Address Shareds,
13029                                        const Expr *IfCond,
13030                                        const OMPTaskDataTy &Data) {
13031   llvm_unreachable("Not supported in SIMD-only mode");
13032 }
13033 
13034 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13035     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13036     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13037     const Expr *IfCond, const OMPTaskDataTy &Data) {
13038   llvm_unreachable("Not supported in SIMD-only mode");
13039 }
13040 
13041 void CGOpenMPSIMDRuntime::emitReduction(
13042     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13043     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13044     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13045   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13046   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13047                                  ReductionOps, Options);
13048 }
13049 
13050 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13051     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13052     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13053   llvm_unreachable("Not supported in SIMD-only mode");
13054 }
13055 
13056 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13057                                                 SourceLocation Loc,
13058                                                 bool IsWorksharingReduction) {
13059   llvm_unreachable("Not supported in SIMD-only mode");
13060 }
13061 
13062 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13063                                                   SourceLocation Loc,
13064                                                   ReductionCodeGen &RCG,
13065                                                   unsigned N) {
13066   llvm_unreachable("Not supported in SIMD-only mode");
13067 }
13068 
13069 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13070                                                   SourceLocation Loc,
13071                                                   llvm::Value *ReductionsPtr,
13072                                                   LValue SharedLVal) {
13073   llvm_unreachable("Not supported in SIMD-only mode");
13074 }
13075 
13076 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13077                                            SourceLocation Loc,
13078                                            const OMPTaskDataTy &Data) {
13079   llvm_unreachable("Not supported in SIMD-only mode");
13080 }
13081 
13082 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13083     CodeGenFunction &CGF, SourceLocation Loc,
13084     OpenMPDirectiveKind CancelRegion) {
13085   llvm_unreachable("Not supported in SIMD-only mode");
13086 }
13087 
13088 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13089                                          SourceLocation Loc, const Expr *IfCond,
13090                                          OpenMPDirectiveKind CancelRegion) {
13091   llvm_unreachable("Not supported in SIMD-only mode");
13092 }
13093 
13094 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13095     const OMPExecutableDirective &D, StringRef ParentName,
13096     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13097     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13098   llvm_unreachable("Not supported in SIMD-only mode");
13099 }
13100 
13101 void CGOpenMPSIMDRuntime::emitTargetCall(
13102     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13103     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13104     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13105     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13106                                      const OMPLoopDirective &D)>
13107         SizeEmitter) {
13108   llvm_unreachable("Not supported in SIMD-only mode");
13109 }
13110 
13111 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13112   llvm_unreachable("Not supported in SIMD-only mode");
13113 }
13114 
13115 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13116   llvm_unreachable("Not supported in SIMD-only mode");
13117 }
13118 
13119 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13120   return false;
13121 }
13122 
13123 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13124                                         const OMPExecutableDirective &D,
13125                                         SourceLocation Loc,
13126                                         llvm::Function *OutlinedFn,
13127                                         ArrayRef<llvm::Value *> CapturedVars) {
13128   llvm_unreachable("Not supported in SIMD-only mode");
13129 }
13130 
13131 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13132                                              const Expr *NumTeams,
13133                                              const Expr *ThreadLimit,
13134                                              SourceLocation Loc) {
13135   llvm_unreachable("Not supported in SIMD-only mode");
13136 }
13137 
13138 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13139     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13140     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13141   llvm_unreachable("Not supported in SIMD-only mode");
13142 }
13143 
13144 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13145     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13146     const Expr *Device) {
13147   llvm_unreachable("Not supported in SIMD-only mode");
13148 }
13149 
13150 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13151                                            const OMPLoopDirective &D,
13152                                            ArrayRef<Expr *> NumIterations) {
13153   llvm_unreachable("Not supported in SIMD-only mode");
13154 }
13155 
13156 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13157                                               const OMPDependClause *C) {
13158   llvm_unreachable("Not supported in SIMD-only mode");
13159 }
13160 
13161 const VarDecl *
13162 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13163                                         const VarDecl *NativeParam) const {
13164   llvm_unreachable("Not supported in SIMD-only mode");
13165 }
13166 
13167 Address
13168 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13169                                          const VarDecl *NativeParam,
13170                                          const VarDecl *TargetParam) const {
13171   llvm_unreachable("Not supported in SIMD-only mode");
13172 }
13173